merge with master

2024-02-02 13:53:25 -08:00 · 2024-02-02 13:53:25 -08:00 · 51765cfe97
parent 7fdd5afab3 9d410496c0
commit 51765cfe97
29 changed files with 596 additions and 27 deletions
--- a/.github/ISSUE_TEMPLATE/01_bug.yml
+++ b/.github/ISSUE_TEMPLATE/01_bug.yml
@ -5,11 +5,8 @@ labels: [possible bug]
 body:
  - type: markdown
    attributes:
-      value: |
-      Use this template to file a bug report for AnythingLLM. Please be as descriptive as possible to allow everyone to replicate and solve your issue.
-
-      Want help contributing a PR? Use our repo chatbot by OnboardAI! https://learnthisrepo.com/anythingllm
-  
+      value: | 
+        Use this template to file a bug report for AnythingLLM. Please be as descriptive as possible to allow everyone to replicate and solve your issue. Want help contributing a PR? Use our repo chatbot by OnboardAI! https://learnthisrepo.com/anythingllm"
  - type: dropdown
    id: runtime
    attributes:
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,6 +1,7 @@
 {
  "cSpell.words": [
    "anythingllm",
+    "Astra",
    "Dockerized",
    "Embeddable",
    "hljs",
--- a/BARE_METAL.md
+++ b/BARE_METAL.md
@ -23,7 +23,12 @@ Here you can find the scripts and known working process to run AnythingLLM outsi

 2. `cd anything-llm` and run `yarn setup`. This will install all dependencies to run in production as well as debug the application.

-3. `cp server/.env.example server/.env` to create the basic ENV file for where instance settings will be read from on service start. This file is automatically managed and should not be edited manually.
+3. `cp server/.env.example server/.env` to create the basic ENV file for where instance settings will be read from on service start.
+
+4. Ensure that the `server/.env` file has _at least_ these keys to start. These values will persist and this file will be automatically written and managed after your first successful boot.
+```
+STORAGE_DIR="/your/absolute/path/to/server/.env"
+```

 ## To start the application

@ -45,10 +50,10 @@ cd server && npx prisma migrate deploy --schema=./prisma/schema.prisma
 ```

 4. Boot the server in production
-`cd server && NODE_ENV=production index.js &` 
+`cd server && NODE_ENV=production node index.js &` 

 5. Boot the collection in another process
-`cd collector && NODE_ENV=production index.js &` 
+`cd collector && NODE_ENV=production node index.js &` 

 AnythingLLM should now be running on `http://localhost:3001`!

--- a/README.md
+++ b/README.md
@ -84,6 +84,7 @@ Some cool features of AnythingLLM
 **Supported Vector Databases:**

 - [LanceDB](https://github.com/lancedb/lancedb) (default)
+- [Astra DB](https://www.datastax.com/products/datastax-astra)
 - [Pinecone](https://pinecone.io)
 - [Chroma](https://trychroma.com)
 - [Weaviate](https://weaviate.io)
--- a/docker/.env.example
+++ b/docker/.env.example
@ -54,6 +54,7 @@ GID='1000'
 # Only used if you are using an LLM that does not natively support embedding (openai or Azure)
 # EMBEDDING_ENGINE='openai'
 # OPEN_AI_KEY=sk-xxxx
+# EMBEDDING_MODEL_PREF='text-embedding-ada-002'

 # EMBEDDING_ENGINE='azure'
 # AZURE_OPENAI_ENDPOINT=
@ -103,6 +104,11 @@ GID='1000'
 # ZILLIZ_ENDPOINT="https://sample.api.gcp-us-west1.zillizcloud.com"
 # ZILLIZ_API_TOKEN=api-token-here

+# Enable all below if you are using vector database: Astra DB.
+# VECTOR_DB="astra"
+# ASTRA_DB_APPLICATION_TOKEN=
+# ASTRA_DB_ENDPOINT=
+
 # CLOUD DEPLOYMENT VARIRABLES ONLY
 # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.

--- a/frontend/src/components/EmbeddingSelection/OpenAiOptions/index.jsx
+++ b/frontend/src/components/EmbeddingSelection/OpenAiOptions/index.jsx
@ -22,12 +22,27 @@ export default function OpenAiOptions({ settings }) {
            Model Preference
          </label>
          <select
-            disabled={true}
-            className="cursor-not-allowed bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+            name="EmbeddingModelPref"
+            required={true}
+            className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
          >
-            <option disabled={true} selected={true}>
-              text-embedding-ada-002
-            </option>
+            <optgroup label="Available embedding models">
+              {[
+                "text-embedding-ada-002",
+                "text-embedding-3-small",
+                "text-embedding-3-large",
+              ].map((model) => {
+                return (
+                  <option
+                    key={model}
+                    value={model}
+                    selected={settings?.EmbeddingModelPref === model}
+                  >
+                    {model}
+                  </option>
+                );
+              })}
+            </optgroup>
          </select>
        </div>
      </div>
--- a/frontend/src/components/LLMSelection/OpenAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/OpenAiOptions/index.jsx
@ -85,6 +85,7 @@ function OpenAIModelSelection({ apiKey, settings }) {
            "gpt-3.5-turbo",
            "gpt-3.5-turbo-1106",
            "gpt-4",
+            "gpt-4-turbo-preview",
            "gpt-4-1106-preview",
            "gpt-4-32k",
          ].map((model) => {
--- a/frontend/src/components/Modals/MangeWorkspace/Documents/index.jsx
+++ b/frontend/src/components/Modals/MangeWorkspace/Documents/index.jsx
@ -6,9 +6,14 @@ import Directory from "./Directory";
 import showToast from "../../../../utils/toast";
 import WorkspaceDirectory from "./WorkspaceDirectory";

-// OpenAI Cost per token for text-ada-embedding
+// OpenAI Cost per token
 // ref: https://openai.com/pricing#:~:text=%C2%A0/%201K%20tokens-,Embedding%20models,-Build%20advanced%20search
-const COST_PER_TOKEN = 0.0000001; // $0.0001 / 1K tokens
+
+const MODEL_COSTS = {
+  "text-embedding-ada-002": 0.0000001, // $0.0001 / 1K tokens
+  "text-embedding-3-small": 0.00000002, // $0.00002 / 1K tokens
+  "text-embedding-3-large": 0.00000013, // $0.00013 / 1K tokens
+};

 export default function DocumentSettings({
  workspace,
@ -142,10 +147,12 @@ export default function DocumentSettings({
    });

    // Do not do cost estimation unless the embedding engine is OpenAi.
-    if (
-      !systemSettings?.EmbeddingEngine ||
-      systemSettings.EmbeddingEngine === "openai"
-    ) {
+    if (systemSettings?.EmbeddingEngine === "openai") {
+      const COST_PER_TOKEN =
+        MODEL_COSTS[
+          systemSettings?.EmbeddingModelPref || "text-embedding-ada-002"
+        ];
+
      const dollarAmount = (totalTokenCount / 1000) * COST_PER_TOKEN;
      setEmbeddingsCost(dollarAmount);
    }
--- a/frontend/src/components/Modals/MangeWorkspace/Settings/ChatModelPreference/useGetProviderModels.js
+++ b/frontend/src/components/Modals/MangeWorkspace/Settings/ChatModelPreference/useGetProviderModels.js
@ -8,6 +8,7 @@ const PROVIDER_DEFAULT_MODELS = {
    "gpt-3.5-turbo",
    "gpt-3.5-turbo-1106",
    "gpt-4",
+    "gpt-4-turbo-preview",
    "gpt-4-1106-preview",
    "gpt-4-32k",
  ],
--- a/frontend/src/components/Modals/MangeWorkspace/Settings/index.jsx
+++ b/frontend/src/components/Modals/MangeWorkspace/Settings/index.jsx
@ -44,6 +44,7 @@ export default function WorkspaceSettings({ active, workspace, settings }) {
  const formEl = useRef(null);
  const [saving, setSaving] = useState(false);
  const [hasChanges, setHasChanges] = useState(false);
+  const [deleting, setDeleting] = useState(false);
  const defaults = recommendedSettings(settings?.LLMProvider);

  const handleUpdate = async (e) => {
@ -72,7 +73,15 @@ export default function WorkspaceSettings({ active, workspace, settings }) {
      )
    )
      return false;
-    await Workspace.delete(workspace.slug);
+
+    setDeleting(true);
+    const success = await Workspace.delete(workspace.slug);
+    if (!success) {
+      showToast("Workspace could not be deleted!", "error", { clear: true });
+      setDeleting(false);
+      return;
+    }
+
    workspace.slug === slug
      ? (window.location = paths.home())
      : window.location.reload();
@ -310,7 +319,11 @@ export default function WorkspaceSettings({ active, workspace, settings }) {
        </div>
      </div>
      <div className="flex items-center justify-between p-2 md:p-6 space-x-2 border-t rounded-b border-gray-600">
-        <DeleteWorkspace workspace={workspace} onClick={deleteWorkspace} />
+        <DeleteWorkspace
+          deleting={deleting}
+          workspace={workspace}
+          onClick={deleteWorkspace}
+        />
        {hasChanges && (
          <button
            type="submit"
@ -324,7 +337,7 @@ export default function WorkspaceSettings({ active, workspace, settings }) {
  );
 }

-function DeleteWorkspace({ workspace, onClick }) {
+function DeleteWorkspace({ deleting, workspace, onClick }) {
  const [canDelete, setCanDelete] = useState(false);
  useEffect(() => {
    async function fetchKeys() {
@ -337,11 +350,12 @@ function DeleteWorkspace({ workspace, onClick }) {
  if (!canDelete) return null;
  return (
    <button
+      disabled={deleting}
      onClick={onClick}
      type="button"
-      className="transition-all duration-300 border border-transparent rounded-lg whitespace-nowrap text-sm px-5 py-2.5 focus:z-10 bg-transparent text-white hover:text-white hover:bg-red-600"
+      className="transition-all duration-300 border border-transparent rounded-lg whitespace-nowrap text-sm px-5 py-2.5 focus:z-10 bg-transparent text-white hover:text-white hover:bg-red-600 disabled:bg-red-600 disabled:text-red-200 disabled:animate-pulse"
    >
-      Delete Workspace
+      {deleting ? "Deleting Workspace..." : "Delete Workspace"}
    </button>
  );
 }
--- a/frontend/src/components/VectorDBSelection/AstraDBOptions/index.jsx
+++ b/frontend/src/components/VectorDBSelection/AstraDBOptions/index.jsx
@ -0,0 +1,41 @@
+export default function AstraDBOptions({ settings }) {
+  return (
+    <div className="w-full flex flex-col gap-y-4">
+      <div className="w-full flex items-center gap-4">
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-4">
+            Astra DB Endpoint
+          </label>
+          <input
+            type="url"
+            name="AstraDBEndpoint"
+            className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
+            placeholder="Astra DB API endpoint"
+            defaultValue={settings?.AstraDBEndpoint}
+            required={true}
+            autoComplete="off"
+            spellCheck={false}
+          />
+        </div>
+
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-4">
+            Astra DB Application Token
+          </label>
+          <input
+            type="password"
+            name="AstraDBApplicationToken"
+            className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
+            placeholder="AstraCS:..."
+            defaultValue={
+              settings?.AstraDBApplicationToken ? "*".repeat(20) : ""
+            }
+            required={true}
+            autoComplete="off"
+            spellCheck={false}
+          />
+        </div>
+      </div>
+    </div>
+  );
+}
--- a/frontend/src/media/vectordbs/astraDB.png
+++ b/frontend/src/media/vectordbs/astraDB.png
--- a/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx
+++ b/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx
@ -10,6 +10,7 @@ import WeaviateLogo from "@/media/vectordbs/weaviate.png";
 import QDrantLogo from "@/media/vectordbs/qdrant.png";
 import MilvusLogo from "@/media/vectordbs/milvus.png";
 import ZillizLogo from "@/media/vectordbs/zilliz.png";
+import AstraDBLogo from "@/media/vectordbs/astraDB.png";
 import PreLoader from "@/components/Preloader";
 import ChangeWarningModal from "@/components/ChangeWarning";
 import { MagnifyingGlass } from "@phosphor-icons/react";
@ -23,6 +24,7 @@ import MilvusDBOptions from "@/components/VectorDBSelection/MilvusDBOptions";
 import ZillizCloudOptions from "@/components/VectorDBSelection/ZillizCloudOptions";
 import { useModal } from "@/hooks/useModal";
 import ModalWrapper from "@/components/ModalWrapper";
+import AstraDBOptions from "@/components/VectorDBSelection/AstraDBOptions";

 export default function GeneralVectorDatabase() {
  const [saving, setSaving] = useState(false);
@ -100,6 +102,13 @@ export default function GeneralVectorDatabase() {
      options: <MilvusDBOptions settings={settings} />,
      description: "Open-source, highly scalable, and blazing fast.",
    },
+    {
+      name: "AstraDB",
+      value: "astra",
+      logo: AstraDBLogo,
+      options: <AstraDBOptions settings={settings} />,
+      description: "Vector Search for Real-world GenAI.",
+    },
  ];

  const updateVectorChoice = (selection) => {
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@ -11,6 +11,7 @@ import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
 import MistralLogo from "@/media/llmprovider/mistral.jpeg";
 import ZillizLogo from "@/media/vectordbs/zilliz.png";
+import AstraDBLogo from "@/media/vectordbs/astraDB.png";
 import ChromaLogo from "@/media/vectordbs/chroma.png";
 import PineconeLogo from "@/media/vectordbs/pinecone.png";
 import LanceDbLogo from "@/media/vectordbs/lancedb.png";
@ -147,6 +148,13 @@ const VECTOR_DB_PRIVACY = {
    ],
    logo: ZillizLogo,
  },
+  astra: {
+    name: "AstraDB",
+    description: [
+      "Your vectors and document text are stored on your cloud AstraDB database.",
+    ],
+    logo: AstraDBLogo,
+  },
  lancedb: {
    name: "LanceDB",
    description: [
--- a/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx
@ -7,6 +7,7 @@ import WeaviateLogo from "@/media/vectordbs/weaviate.png";
 import QDrantLogo from "@/media/vectordbs/qdrant.png";
 import MilvusLogo from "@/media/vectordbs/milvus.png";
 import ZillizLogo from "@/media/vectordbs/zilliz.png";
+import AstraDBLogo from "@/media/vectordbs/astraDB.png";
 import System from "@/models/system";
 import paths from "@/utils/paths";
 import PineconeDBOptions from "@/components/VectorDBSelection/PineconeDBOptions";
@ -16,6 +17,7 @@ import WeaviateDBOptions from "@/components/VectorDBSelection/WeaviateDBOptions"
 import LanceDBOptions from "@/components/VectorDBSelection/LanceDBOptions";
 import MilvusOptions from "@/components/VectorDBSelection/MilvusDBOptions";
 import ZillizCloudOptions from "@/components/VectorDBSelection/ZillizCloudOptions";
+import AstraDBOptions from "@/components/VectorDBSelection/AstraDBOptions";
 import showToast from "@/utils/toast";
 import { useNavigate } from "react-router-dom";
 import VectorDBItem from "@/components/VectorDBSelection/VectorDBItem";
@ -100,6 +102,13 @@ export default function VectorDatabaseConnection({
      options: <MilvusOptions settings={settings} />,
      description: "Open-source, highly scalable, and blazing fast.",
    },
+    {
+      name: "AstraDB",
+      value: "astra",
+      logo: AstraDBLogo,
+      options: <AstraDBOptions settings={settings} />,
+      description: "Vector Search for Real-world GenAI.",
+    },
  ];

  function handleForward() {
--- a/server/.env.example
+++ b/server/.env.example
@ -51,6 +51,7 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
 # Only used if you are using an LLM that does not natively support embedding (openai or Azure)
 # EMBEDDING_ENGINE='openai'
 # OPEN_AI_KEY=sk-xxxx
+# EMBEDDING_MODEL_PREF='text-embedding-ada-002'

 # EMBEDDING_ENGINE='azure'
 # AZURE_OPENAI_ENDPOINT=
@ -76,6 +77,11 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
 # PINECONE_API_KEY=
 # PINECONE_INDEX=

+# Enable all below if you are using vector database: Astra DB.
+# VECTOR_DB="astra"
+# ASTRA_DB_APPLICATION_TOKEN=
+# ASTRA_DB_ENDPOINT=
+
 # Enable all below if you are using vector database: LanceDB.
 VECTOR_DB="lancedb"

--- a/server/models/documents.js
+++ b/server/models/documents.js
@ -106,6 +106,9 @@ const Document = {
        await prisma.workspace_documents.delete({
          where: { id: document.id, workspaceId: workspace.id },
        });
+        await prisma.document_vectors.deleteMany({
+          where: { docId: document.docId },
+        });
      } catch (error) {
        console.error(error.message);
      }
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -68,6 +68,12 @@ const SystemSettings = {
            ZillizApiToken: process.env.ZILLIZ_API_TOKEN,
          }
        : {}),
+      ...(vectorDB === "astra"
+        ? {
+            AstraDBApplicationToken: process?.env?.ASTRA_DB_APPLICATION_TOKEN,
+            AstraDBEndpoint: process?.env?.ASTRA_DB_ENDPOINT,
+          }
+        : {}),
      LLMProvider: llmProvider,
      ...(llmProvider === "openai"
        ? {
--- a/server/models/workspace.js
+++ b/server/models/workspace.js
@ -3,6 +3,7 @@ const slugify = require("slugify");
 const { Document } = require("./documents");
 const { WorkspaceUser } = require("./workspaceUsers");
 const { ROLES } = require("../utils/middleware/multiUserProtected");
+const { v4: uuidv4 } = require("uuid");

 const Workspace = {
  writable: [
@ -22,6 +23,7 @@ const Workspace = {
  new: async function (name = null, creatorId = null) {
    if (!name) return { result: null, message: "name cannot be null" };
    var slug = slugify(name, { lower: true });
+    slug = slug || uuidv4();

    const existingBySlug = await this.get({ slug });
    if (existingBySlug !== null) {
--- a/server/package.json
+++ b/server/package.json
@ -22,6 +22,7 @@
  "dependencies": {
    "@anthropic-ai/sdk": "^0.8.1",
    "@azure/openai": "1.0.0-beta.10",
+    "@datastax/astra-db-ts": "^0.1.3",
    "@google/generative-ai": "^0.1.3",
    "@googleapis/youtube": "^9.0.0",
    "@pinecone-database/pinecone": "^2.0.1",
--- a/server/utils/AiProviders/openAi/index.js
+++ b/server/utils/AiProviders/openAi/index.js
@ -52,6 +52,8 @@ class OpenAiLLM {
        return 8192;
      case "gpt-4-1106-preview":
        return 128000;
+      case "gpt-4-turbo-preview":
+        return 128000;
      case "gpt-4-32k":
        return 32000;
      default:
@ -65,6 +67,7 @@ class OpenAiLLM {
      "gpt-3.5-turbo",
      "gpt-3.5-turbo-1106",
      "gpt-4-1106-preview",
+      "gpt-4-turbo-preview",
      "gpt-4-32k",
    ];
    const isPreset = validModels.some((model) => modelName === model);
--- a/server/utils/EmbeddingEngines/openAi/index.js
+++ b/server/utils/EmbeddingEngines/openAi/index.js
@ -9,6 +9,7 @@ class OpenAiEmbedder {
    });
    const openai = new OpenAIApi(config);
    this.openai = openai;
+    this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-ada-002";

    // Limit of how many strings we can process in a single pass to stay with resource or network limits
    this.maxConcurrentChunks = 500;
@ -30,7 +31,7 @@ class OpenAiEmbedder {
        new Promise((resolve) => {
          this.openai
            .createEmbedding({
-              model: "text-embedding-ada-002",
+              model: this.model,
              input: chunk,
            })
            .then((res) => {
--- a/server/utils/chats/stream.js
+++ b/server/utils/chats/stream.js
@ -269,6 +269,7 @@ function handleStreamResponses(response, stream, responseProps) {
        for (const choice of event.choices) {
          const delta = choice.delta?.content;
          if (!delta) continue;
+          fullText += delta;
          writeResponseChunk(response, {
            uuid,
            sources: [],
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@ -22,6 +22,9 @@ function getVectorDbClass() {
    case "zilliz":
      const { Zilliz } = require("../vectorDbProviders/zilliz");
      return Zilliz;
+    case "astra":
+      const { AstraDB } = require("../vectorDbProviders/astra");
+      return AstraDB;
    default:
      throw new Error("ENV: No VECTOR_DB value found in environment!");
  }
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -204,6 +204,17 @@ const KEY_MAPPING = {
    checks: [isNotEmpty],
  },

+  // Astra DB Options
+
+  AstraDBApplicationToken: {
+    envKey: "ASTRA_DB_APPLICATION_TOKEN",
+    checks: [isNotEmpty],
+  },
+  AstraDBEndpoint: {
+    envKey: "ASTRA_DB_ENDPOINT",
+    checks: [isNotEmpty],
+  },
+
  // Together Ai Options
  TogetherAiApiKey: {
    envKey: "TOGETHER_AI_API_KEY",
@ -322,6 +333,7 @@ function supportedVectorDB(input = "") {
    "qdrant",
    "milvus",
    "zilliz",
+    "astra",
  ];
  return supported.includes(input)
    ? null
--- a/server/utils/vectorDbProviders/astra/ASTRA_SETUP.md
+++ b/server/utils/vectorDbProviders/astra/ASTRA_SETUP.md
@ -0,0 +1,22 @@
+# How to setup Astra Vector Database for AnythingLLM
+
+[Official Astra DB Docs](https://docs.datastax.com/en/astra/astra-db-vector/get-started/quickstart.html) for reference.
+
+### How to get started
+
+**Requirements**
+
+- Astra Vector Database with active status.
+
+**Instructions**
+
+- [Create an Astra account or sign in to an existing Astra account](astra.datastax.com)
+- Create an Astra Serverless(Vector) Database.
+- Make sure DB is in active state.
+- Get `API ENDPOINT`and `Application Token` from Overview screen
+
+```
+VECTOR_DB="astra"
+ASTRA_DB_ENDPOINT=Astra DB API endpoint
+ASTRA_DB_APPLICATION_TOKEN=AstraCS:..
+```
--- a/server/utils/vectorDbProviders/astra/index.js
+++ b/server/utils/vectorDbProviders/astra/index.js
@ -0,0 +1,380 @@
+const { AstraDB: AstraClient } = require("@datastax/astra-db-ts");
+const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
+const { storeVectorResult, cachedVectorInformation } = require("../../files");
+const { v4: uuidv4 } = require("uuid");
+const {
+  toChunks,
+  getLLMProvider,
+  getEmbeddingEngineSelection,
+} = require("../../helpers");
+
+const AstraDB = {
+  name: "AstraDB",
+  connect: async function () {
+    if (process.env.VECTOR_DB !== "astra")
+      throw new Error("AstraDB::Invalid ENV settings");
+
+    const client = new AstraClient(
+      process?.env?.ASTRA_DB_APPLICATION_TOKEN,
+      process?.env?.ASTRA_DB_ENDPOINT
+    );
+    return { client };
+  },
+  heartbeat: async function () {
+    return { heartbeat: Number(new Date()) };
+  },
+  // Astra interface will return a valid collection object even if the collection
+  // does not actually exist. So we run a simple check which will always throw
+  // when the table truly does not exist. Faster than iterating all collections.
+  isRealCollection: async function (astraCollection = null) {
+    if (!astraCollection) return false;
+    return await astraCollection
+      .countDocuments()
+      .then(() => true)
+      .catch(() => false);
+  },
+  totalVectors: async function () {
+    const { client } = await this.connect();
+    const collectionNames = await this.allNamespaces(client);
+    var totalVectors = 0;
+    for (const name of collectionNames) {
+      const collection = await client.collection(name).catch(() => null);
+      const count = await collection.countDocuments().catch(() => 0);
+      totalVectors += count ? count : 0;
+    }
+    return totalVectors;
+  },
+  namespaceCount: async function (_namespace = null) {
+    const { client } = await this.connect();
+    const namespace = await this.namespace(client, _namespace);
+    return namespace?.vectorCount || 0;
+  },
+  namespace: async function (client, namespace = null) {
+    if (!namespace) throw new Error("No namespace value provided.");
+    const collection = await client.collection(namespace).catch(() => null);
+    if (!(await this.isRealCollection(collection))) return null;
+
+    const count = await collection.countDocuments().catch((e) => {
+      console.error("Astra::namespaceExists", e.message);
+      return null;
+    });
+
+    return {
+      name: namespace,
+      ...collection,
+      vectorCount: typeof count === "number" ? count : 0,
+    };
+  },
+  hasNamespace: async function (namespace = null) {
+    if (!namespace) return false;
+    const { client } = await this.connect();
+    return await this.namespaceExists(client, namespace);
+  },
+  namespaceExists: async function (client, namespace = null) {
+    if (!namespace) throw new Error("No namespace value provided.");
+    const collection = await client.collection(namespace);
+    return await this.isRealCollection(collection);
+  },
+  deleteVectorsInNamespace: async function (client, namespace = null) {
+    await client.dropCollection(namespace);
+    return true;
+  },
+  // AstraDB requires a dimension aspect for collection creation
+  // we pass this in from the first chunk to infer the dimensions like other
+  // providers do.
+  getOrCreateCollection: async function (client, namespace, dimensions = null) {
+    const isExists = await this.namespaceExists(client, namespace);
+    if (!isExists) {
+      if (!dimensions)
+        throw new Error(
+          `AstraDB:getOrCreateCollection Unable to infer vector dimension from input. Open an issue on Github for support.`
+        );
+
+      await client.createCollection(namespace, {
+        vector: {
+          dimension: dimensions,
+          metric: "cosine",
+        },
+      });
+    }
+    return await client.collection(namespace);
+  },
+  addDocumentToNamespace: async function (
+    namespace,
+    documentData = {},
+    fullFilePath = null
+  ) {
+    const { DocumentVectors } = require("../../../models/vectors");
+    try {
+      let vectorDimension = null;
+      const { pageContent, docId, ...metadata } = documentData;
+      if (!pageContent || pageContent.length == 0) return false;
+
+      console.log("Adding new vectorized document into namespace", namespace);
+      const cacheResult = await cachedVectorInformation(fullFilePath);
+      if (cacheResult.exists) {
+        const { client } = await this.connect();
+        const { chunks } = cacheResult;
+        const documentVectors = [];
+        vectorDimension = chunks[0][0].values.length || null;
+
+        const collection = await this.getOrCreateCollection(
+          client,
+          namespace,
+          vectorDimension
+        );
+        if (!(await this.isRealCollection(collection)))
+          throw new Error("Failed to create new AstraDB collection!", {
+            namespace,
+          });
+
+        for (const chunk of chunks) {
+          // Before sending to Astra and saving the records to our db
+          // we need to assign the id of each chunk that is stored in the cached file.
+          const newChunks = chunk.map((chunk) => {
+            const _id = uuidv4();
+            documentVectors.push({ docId, vectorId: _id });
+            return {
+              _id: _id,
+              $vector: chunk.values,
+              metadata: chunk.metadata || {},
+            };
+          });
+
+          await collection.insertMany(newChunks);
+        }
+        await DocumentVectors.bulkInsert(documentVectors);
+        return { vectorized: true, error: null };
+      }
+
+      const textSplitter = new RecursiveCharacterTextSplitter({
+        chunkSize:
+          getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
+        chunkOverlap: 20,
+      });
+      const textChunks = await textSplitter.splitText(pageContent);
+
+      console.log("Chunks created from document:", textChunks.length);
+      const LLMConnector = getLLMProvider();
+      const documentVectors = [];
+      const vectors = [];
+      const vectorValues = await LLMConnector.embedChunks(textChunks);
+
+      if (!!vectorValues && vectorValues.length > 0) {
+        for (const [i, vector] of vectorValues.entries()) {
+          if (!vectorDimension) vectorDimension = vector.length;
+          const vectorRecord = {
+            _id: uuidv4(),
+            $vector: vector,
+            metadata: { ...metadata, text: textChunks[i] },
+          };
+
+          vectors.push(vectorRecord);
+          documentVectors.push({ docId, vectorId: vectorRecord._id });
+        }
+      } else {
+        throw new Error(
+          "Could not embed document chunks! This document will not be recorded."
+        );
+      }
+      const { client } = await this.connect();
+      const collection = await this.getOrCreateCollection(
+        client,
+        namespace,
+        vectorDimension
+      );
+      if (!(await this.isRealCollection(collection)))
+        throw new Error("Failed to create new AstraDB collection!", {
+          namespace,
+        });
+
+      if (vectors.length > 0) {
+        const chunks = [];
+
+        console.log("Inserting vectorized chunks into Astra DB.");
+
+        // AstraDB has maximum upsert size of 20 records per-request so we have to use a lower chunk size here
+        // in order to do the queries - this takes a lot more time than other providers but there
+        // is no way around it. This will save the vector-cache with the same layout, so we don't
+        // have to chunk again for cached files.
+        for (const chunk of toChunks(vectors, 20)) {
+          chunks.push(
+            chunk.map((c) => {
+              return { id: c._id, values: c.$vector, metadata: c.metadata };
+            })
+          );
+          await collection.insertMany(chunk);
+        }
+        await storeVectorResult(chunks, fullFilePath);
+      }
+
+      await DocumentVectors.bulkInsert(documentVectors);
+      return { vectorized: true, error: null };
+    } catch (e) {
+      console.error("addDocumentToNamespace", e.message);
+      return { vectorized: false, error: e.message };
+    }
+  },
+  deleteDocumentFromNamespace: async function (namespace, docId) {
+    const { DocumentVectors } = require("../../../models/vectors");
+    const { client } = await this.connect();
+    if (!(await this.namespaceExists(client, namespace)))
+      throw new Error(
+        "Invalid namespace - has it been collected and populated yet?"
+      );
+    const collection = await client.collection(namespace);
+
+    const knownDocuments = await DocumentVectors.where({ docId });
+    if (knownDocuments.length === 0) return;
+
+    const vectorIds = knownDocuments.map((doc) => doc.vectorId);
+    for (const id of vectorIds) {
+      await collection.deleteMany({
+        _id: id,
+      });
+    }
+
+    const indexes = knownDocuments.map((doc) => doc.id);
+    await DocumentVectors.deleteIds(indexes);
+    return true;
+  },
+  performSimilaritySearch: async function ({
+    namespace = null,
+    input = "",
+    LLMConnector = null,
+    similarityThreshold = 0.25,
+    topN = 4,
+  }) {
+    if (!namespace || !input || !LLMConnector)
+      throw new Error("Invalid request to performSimilaritySearch.");
+
+    const { client } = await this.connect();
+    if (!(await this.namespaceExists(client, namespace))) {
+      return {
+        contextTexts: [],
+        sources: [],
+        message:
+          "Invalid query - no namespace found for workspace in vector db!",
+      };
+    }
+
+    const queryVector = await LLMConnector.embedTextInput(input);
+    const { contextTexts, sourceDocuments } = await this.similarityResponse(
+      client,
+      namespace,
+      queryVector,
+      similarityThreshold,
+      topN
+    );
+
+    const sources = sourceDocuments.map((metadata, i) => {
+      return { ...metadata, text: contextTexts[i] };
+    });
+    return {
+      contextTexts,
+      sources: this.curateSources(sources),
+      message: false,
+    };
+  },
+  similarityResponse: async function (
+    client,
+    namespace,
+    queryVector,
+    similarityThreshold = 0.25,
+    topN = 4
+  ) {
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+      scores: [],
+    };
+
+    const collection = await client.collection(namespace);
+    const responses = await collection
+      .find(
+        {},
+        {
+          sort: { $vector: queryVector },
+          limit: topN,
+          includeSimilarity: true,
+        }
+      )
+      .toArray();
+
+    responses.forEach((response) => {
+      if (response.$similarity < similarityThreshold) return;
+      result.contextTexts.push(response.metadata.text);
+      result.sourceDocuments.push(response);
+      result.scores.push(response.$similarity);
+    });
+    return result;
+  },
+  allNamespaces: async function (client) {
+    try {
+      let header = new Headers();
+      header.append("Token", client?.httpClient?.applicationToken);
+      header.append("Content-Type", "application/json");
+
+      let raw = JSON.stringify({
+        findCollections: {},
+      });
+
+      let requestOptions = {
+        method: "POST",
+        headers: header,
+        body: raw,
+        redirect: "follow",
+      };
+
+      const call = await fetch(client?.httpClient?.baseUrl, requestOptions);
+      const resp = await call?.text();
+      const collections = resp ? JSON.parse(resp)?.status?.collections : [];
+      return collections;
+    } catch (e) {
+      console.error("Astra::AllNamespace", e);
+      return [];
+    }
+  },
+  "namespace-stats": async function (reqBody = {}) {
+    const { namespace = null } = reqBody;
+    if (!namespace) throw new Error("namespace required");
+    const { client } = await this.connect();
+    if (!(await this.namespaceExists(client, namespace)))
+      throw new Error("Namespace by that name does not exist.");
+    const stats = await this.namespace(client, namespace);
+    return stats
+      ? stats
+      : { message: "No stats were able to be fetched from DB for namespace" };
+  },
+  "delete-namespace": async function (reqBody = {}) {
+    const { namespace = null } = reqBody;
+    const { client } = await this.connect();
+    if (!(await this.namespaceExists(client, namespace)))
+      throw new Error("Namespace by that name does not exist.");
+
+    const details = await this.namespace(client, namespace);
+    await this.deleteVectorsInNamespace(client, namespace);
+    return {
+      message: `Namespace ${namespace} was deleted along with ${
+        details?.vectorCount || "all"
+      } vectors.`,
+    };
+  },
+  curateSources: function (sources = []) {
+    const documents = [];
+    for (const source of sources) {
+      if (Object.keys(source).length > 0) {
+        const metadata = source.hasOwnProperty("metadata")
+          ? source.metadata
+          : source;
+        documents.push({
+          ...metadata,
+        });
+      }
+    }
+
+    return documents;
+  },
+};
+
+module.exports.AstraDB = AstraDB;
--- a/server/utils/vectorDbProviders/lance/index.js
+++ b/server/utils/vectorDbProviders/lance/index.js
@ -207,9 +207,9 @@ const LanceDb = {

          vectors.push(vectorRecord);
          submissions.push({
+            ...vectorRecord.metadata,
            id: vectorRecord.id,
            vector: vectorRecord.values,
-            ...vectorRecord.metadata,
          });
          documentVectors.push({ docId, vectorId: vectorRecord.id });
        }
--- a/server/yarn.lock
+++ b/server/yarn.lock
@ -174,6 +174,15 @@
    enabled "2.0.x"
    kuler "^2.0.0"

+"@datastax/astra-db-ts@^0.1.3":
+  version "0.1.3"
+  resolved "https://registry.yarnpkg.com/@datastax/astra-db-ts/-/astra-db-ts-0.1.3.tgz#fcc25cda8d146c06278860054f09d687ff031568"
+  integrity sha512-7lnpym0HhUtfJVd8+vu6vYdDQpFyYof7TVLFVD2fgoIjUwj3EksFXmqDqicLAlLferZDllqSVthX9pXQ5Rdapw==
+  dependencies:
+    axios "^1.4.0"
+    bson "^6.2.0"
+    winston "^3.7.2"
+
 "@eslint-community/eslint-utils@^4.2.0":
  version "4.4.0"
  resolved "https://registry.yarnpkg.com/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz#a23514e8fb9af1269d5f7788aa556798d61c6b59"
@ -1353,6 +1362,11 @@ braces@~3.0.2:
  dependencies:
    fill-range "^7.0.1"

+bson@^6.2.0:
+  version "6.2.0"
+  resolved "https://registry.yarnpkg.com/bson/-/bson-6.2.0.tgz#4b6acafc266ba18eeee111373c2699304a9ba0a3"
+  integrity sha512-ID1cI+7bazPDyL9wYy9GaQ8gEEohWvcUl/Yf0dIdutJxnmInEEyCsb4awy/OiBfall7zBA179Pahi3vCdFze3Q==
+
 btoa-lite@^1.0.0:
  version "1.0.0"
  resolved "https://registry.yarnpkg.com/btoa-lite/-/btoa-lite-1.0.0.tgz#337766da15801210fdd956c22e9c6891ab9d0337"
@ -5636,7 +5650,7 @@ winston-transport@^4.5.0:
    readable-stream "^3.6.0"
    triple-beam "^1.3.0"

-winston@^3.9.0:
+winston@^3.7.2, winston@^3.9.0:
  version "3.11.0"
  resolved "https://registry.yarnpkg.com/winston/-/winston-3.11.0.tgz#2d50b0a695a2758bb1c95279f0a88e858163ed91"
  integrity sha512-L3yR6/MzZAOl0DsysUXHVjOwv8mKZ71TrA/41EIduGpOOV5LQVodqN+QdQ6BS6PJ/RdIshZhq84P/fStEZkk7g==