diff --git a/frontend/src/media/vectordbs/weaviate.png b/frontend/src/media/vectordbs/weaviate.png
new file mode 100644
index 00000000..d7980bf6
Binary files /dev/null and b/frontend/src/media/vectordbs/weaviate.png differ
diff --git a/server/.env.example b/server/.env.example
index e06c0f7e..606dd898 100644
--- a/server/.env.example
+++ b/server/.env.example
@@ -31,6 +31,12 @@ PINECONE_INDEX=
# Enable all below if you are using vector database: LanceDB.
# VECTOR_DB="lancedb"
+# Enable all below if you are using vector database: Weaviate.
+# VECTOR_DB="weaviate"
+# WEAVIATE_ENDPOINT="http://localhost:8080"
+# WEAVIATE_API_KEY=
+
+
# CLOUD DEPLOYMENT VARIRABLES ONLY
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
# STORAGE_DIR= # absolute filesystem path with no trailing slash
diff --git a/server/endpoints/system.js b/server/endpoints/system.js
index 98354f4a..01a367c7 100644
--- a/server/endpoints/system.js
+++ b/server/endpoints/system.js
@@ -60,6 +60,12 @@ function systemEndpoints(app) {
ChromaEndpoint: process.env.CHROMA_ENDPOINT,
}
: {}),
+ ...(vectorDB === "weaviate"
+ ? {
+ WeaviateEndpoint: process.env.WEAVIATE_ENDPOINT,
+ WeaviateApiKey: process.env.WEAVIATE_API_KEY,
+ }
+ : {}),
LLMProvider: llmProvider,
...(llmProvider === "openai"
? {
diff --git a/server/package.json b/server/package.json
index 3d8ec2b8..e35eb69d 100644
--- a/server/package.json
+++ b/server/package.json
@@ -26,6 +26,7 @@
"dotenv": "^16.0.3",
"express": "^4.18.2",
"extract-zip": "^2.0.1",
+ "graphql": "^16.7.1",
"jsonwebtoken": "^8.5.1",
"langchain": "^0.0.90",
"moment": "^2.29.4",
@@ -38,7 +39,8 @@
"sqlite3": "^5.1.6",
"uuid": "^9.0.0",
"uuid-apikey": "^1.5.3",
- "vectordb": "0.1.12"
+ "vectordb": "0.1.12",
+ "weaviate-ts-client": "^1.4.0"
},
"devDependencies": {
"nodemon": "^2.0.22",
diff --git a/server/utils/helpers/camelcase.js b/server/utils/helpers/camelcase.js
new file mode 100644
index 00000000..4a8e1b28
--- /dev/null
+++ b/server/utils/helpers/camelcase.js
@@ -0,0 +1,143 @@
+const UPPERCASE = /[\p{Lu}]/u;
+const LOWERCASE = /[\p{Ll}]/u;
+const LEADING_CAPITAL = /^[\p{Lu}](?![\p{Lu}])/gu;
+const IDENTIFIER = /([\p{Alpha}\p{N}_]|$)/u;
+const SEPARATORS = /[_.\- ]+/;
+
+const LEADING_SEPARATORS = new RegExp("^" + SEPARATORS.source);
+const SEPARATORS_AND_IDENTIFIER = new RegExp(
+ SEPARATORS.source + IDENTIFIER.source,
+ "gu"
+);
+const NUMBERS_AND_IDENTIFIER = new RegExp("\\d+" + IDENTIFIER.source, "gu");
+
+const preserveCamelCase = (
+ string,
+ toLowerCase,
+ toUpperCase,
+ preserveConsecutiveUppercase
+) => {
+ let isLastCharLower = false;
+ let isLastCharUpper = false;
+ let isLastLastCharUpper = false;
+ let isLastLastCharPreserved = false;
+
+ for (let index = 0; index < string.length; index++) {
+ const character = string[index];
+ isLastLastCharPreserved = index > 2 ? string[index - 3] === "-" : true;
+
+ if (isLastCharLower && UPPERCASE.test(character)) {
+ string = string.slice(0, index) + "-" + string.slice(index);
+ isLastCharLower = false;
+ isLastLastCharUpper = isLastCharUpper;
+ isLastCharUpper = true;
+ index++;
+ } else if (
+ isLastCharUpper &&
+ isLastLastCharUpper &&
+ LOWERCASE.test(character) &&
+ (!isLastLastCharPreserved || preserveConsecutiveUppercase)
+ ) {
+ string = string.slice(0, index - 1) + "-" + string.slice(index - 1);
+ isLastLastCharUpper = isLastCharUpper;
+ isLastCharUpper = false;
+ isLastCharLower = true;
+ } else {
+ isLastCharLower =
+ toLowerCase(character) === character &&
+ toUpperCase(character) !== character;
+ isLastLastCharUpper = isLastCharUpper;
+ isLastCharUpper =
+ toUpperCase(character) === character &&
+ toLowerCase(character) !== character;
+ }
+ }
+
+ return string;
+};
+
+const preserveConsecutiveUppercase = (input, toLowerCase) => {
+ LEADING_CAPITAL.lastIndex = 0;
+
+ return input.replace(LEADING_CAPITAL, (m1) => toLowerCase(m1));
+};
+
+const postProcess = (input, toUpperCase) => {
+ SEPARATORS_AND_IDENTIFIER.lastIndex = 0;
+ NUMBERS_AND_IDENTIFIER.lastIndex = 0;
+
+ return input
+ .replace(SEPARATORS_AND_IDENTIFIER, (_, identifier) =>
+ toUpperCase(identifier)
+ )
+ .replace(NUMBERS_AND_IDENTIFIER, (m) => toUpperCase(m));
+};
+
+function camelCase(input, options) {
+ if (!(typeof input === "string" || Array.isArray(input))) {
+ throw new TypeError("Expected the input to be `string | string[]`");
+ }
+
+ options = {
+ pascalCase: true,
+ preserveConsecutiveUppercase: false,
+ ...options,
+ };
+
+ if (Array.isArray(input)) {
+ input = input
+ .map((x) => x.trim())
+ .filter((x) => x.length)
+ .join("-");
+ } else {
+ input = input.trim();
+ }
+
+ if (input.length === 0) {
+ return "";
+ }
+
+ const toLowerCase =
+ options.locale === false
+ ? (string) => string.toLowerCase()
+ : (string) => string.toLocaleLowerCase(options.locale);
+
+ const toUpperCase =
+ options.locale === false
+ ? (string) => string.toUpperCase()
+ : (string) => string.toLocaleUpperCase(options.locale);
+
+ if (input.length === 1) {
+ if (SEPARATORS.test(input)) {
+ return "";
+ }
+
+ return options.pascalCase ? toUpperCase(input) : toLowerCase(input);
+ }
+
+ const hasUpperCase = input !== toLowerCase(input);
+
+ if (hasUpperCase) {
+ input = preserveCamelCase(
+ input,
+ toLowerCase,
+ toUpperCase,
+ options.preserveConsecutiveUppercase
+ );
+ }
+
+ input = input.replace(LEADING_SEPARATORS, "");
+ input = options.preserveConsecutiveUppercase
+ ? preserveConsecutiveUppercase(input, toLowerCase)
+ : toLowerCase(input);
+
+ if (options.pascalCase) {
+ input = toUpperCase(input.charAt(0)) + input.slice(1);
+ }
+
+ return postProcess(input, toUpperCase);
+}
+
+module.exports = {
+ camelCase,
+};
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index 5be56507..b7fb5ae0 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -10,6 +10,9 @@ function getVectorDbClass() {
case "lancedb":
const { LanceDb } = require("../vectorDbProviders/lance");
return LanceDb;
+ case "weaviate":
+ const { Weaviate } = require("../vectorDbProviders/weaviate");
+ return Weaviate;
default:
throw new Error("ENV: No VECTOR_DB value found in environment!");
}
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index 64c91988..9f00ec42 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -39,6 +39,15 @@ const KEY_MAPPING = {
envKey: "CHROMA_ENDPOINT",
checks: [isValidURL, validChromaURL],
},
+ WeaviateEndpoint: {
+ envKey: "WEAVIATE_ENDPOINT",
+ checks: [isValidURL],
+ },
+ WeaviateApiKey: {
+ envKey: "WEAVIATE_API_KEY",
+ checks: [],
+ },
+
PineConeEnvironment: {
envKey: "PINECONE_ENVIRONMENT",
checks: [],
@@ -103,7 +112,7 @@ function validOpenAIModel(input = "") {
}
function supportedVectorDB(input = "") {
- const supported = ["chroma", "pinecone", "lancedb"];
+ const supported = ["chroma", "pinecone", "lancedb", "weaviate"];
return supported.includes(input)
? null
: `Invalid VectorDB type. Must be one of ${supported.join(", ")}.`;
diff --git a/server/utils/vectorDbProviders/weaviate/WEAVIATE_SETUP.md b/server/utils/vectorDbProviders/weaviate/WEAVIATE_SETUP.md
new file mode 100644
index 00000000..fc0acaec
--- /dev/null
+++ b/server/utils/vectorDbProviders/weaviate/WEAVIATE_SETUP.md
@@ -0,0 +1,17 @@
+# How to setup a local (or cloud) Weaviate Vector Database
+
+[Get a Weaviate Cloud instance](https://weaviate.io/developers/weaviate/quickstart#create-an-instance).
+[Set up Weaviate locally on Docker](https://weaviate.io/developers/weaviate/installation/docker-compose).
+
+Fill out the variables in the "Vector Database" tab of settings. Select Weaviate as your provider and fill out the appropriate fields
+with the information from either of the above steps.
+
+### How to get started _Development mode only_
+
+After setting up either the Weaviate cloud or local dockerized instance you just need to set these variable in `.env.development` or defined them at runtime via the UI.
+
+```
+VECTOR_DB="weaviate"
+WEAVIATE_ENDPOINT='http://localhost:8080'
+WEAVIATE_API_KEY= # Optional
+```
diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js
new file mode 100644
index 00000000..884c08e0
--- /dev/null
+++ b/server/utils/vectorDbProviders/weaviate/index.js
@@ -0,0 +1,503 @@
+const { default: weaviate } = require("weaviate-ts-client");
+const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
+const { storeVectorResult, cachedVectorInformation } = require("../../files");
+const { v4: uuidv4 } = require("uuid");
+const { toChunks, getLLMProvider } = require("../../helpers");
+const { chatPrompt } = require("../../chats");
+const { camelCase } = require("../../helpers/camelcase");
+
+const Weaviate = {
+ name: "Weaviate",
+ connect: async function () {
+ if (process.env.VECTOR_DB !== "weaviate")
+ throw new Error("Weaviate::Invalid ENV settings");
+
+ const weaviateUrl = new URL(process.env.WEAVIATE_ENDPOINT);
+ const options = {
+ scheme: weaviateUrl.protocol?.replace(":", "") || "http",
+ host: weaviateUrl?.host,
+ ...(process.env?.WEAVIATE_API_KEY?.length > 0
+ ? { apiKey: new weaviate.ApiKey(process.env?.WEAVIATE_API_KEY) }
+ : {}),
+ };
+ const client = weaviate.client(options);
+ const isAlive = await await client.misc.liveChecker().do();
+ if (!isAlive)
+ throw new Error(
+ "Weaviate::Invalid Alive signal received - is the service online?"
+ );
+ return { client };
+ },
+ heartbeat: async function () {
+ await this.connect();
+ return { heartbeat: Number(new Date()) };
+ },
+ totalIndicies: async function () {
+ const { client } = await this.connect();
+ const collectionNames = await this.allNamespaces(client);
+ var totalVectors = 0;
+ for (const name of collectionNames) {
+ totalVectors += await this.namespaceCountWithClient(client, name);
+ }
+ return totalVectors;
+ },
+ namespaceCountWithClient: async function (client, namespace) {
+ try {
+ const response = await client.graphql
+ .aggregate()
+ .withClassName(camelCase(namespace))
+ .withFields("meta { count }")
+ .do();
+ return (
+ response?.data?.Aggregate?.[camelCase(namespace)]?.[0]?.meta?.count || 0
+ );
+ } catch (e) {
+ console.error(`Weaviate:namespaceCountWithClient`, e.message);
+ return 0;
+ }
+ },
+ namespaceCount: async function (namespace = null) {
+ try {
+ const { client } = await this.connect();
+ const response = await client.graphql
+ .aggregate()
+ .withClassName(camelCase(namespace))
+ .withFields("meta { count }")
+ .do();
+
+ return (
+ response?.data?.Aggregate?.[camelCase(namespace)]?.[0]?.meta?.count || 0
+ );
+ } catch (e) {
+ console.error(`Weaviate:namespaceCountWithClient`, e.message);
+ return 0;
+ }
+ },
+ similarityResponse: async function (client, namespace, queryVector) {
+ const result = {
+ contextTexts: [],
+ sourceDocuments: [],
+ };
+
+ const weaviateClass = await this.namespace(client, namespace);
+ const fields = weaviateClass.properties.map((prop) => prop.name).join(" ");
+ const queryResponse = await client.graphql
+ .get()
+ .withClassName(camelCase(namespace))
+ .withFields(`${fields} _additional { id }`)
+ .withNearVector({ vector: queryVector })
+ .withLimit(4)
+ .do();
+
+ const responses = queryResponse?.data?.Get?.[camelCase(namespace)];
+ responses.forEach((response) => {
+ // In Weaviate we have to pluck id from _additional and spread it into the rest
+ // of the properties.
+ const {
+ _additional: { id },
+ ...rest
+ } = response;
+ result.contextTexts.push(rest.text);
+ result.sourceDocuments.push({ ...rest, id });
+ });
+
+ return result;
+ },
+ allNamespaces: async function (client) {
+ try {
+ const { classes = [] } = await client.schema.getter().do();
+ return classes.map((classObj) => classObj.class);
+ } catch (e) {
+ console.error("Weaviate::AllNamespace", e);
+ return [];
+ }
+ },
+ namespace: async function (client, namespace = null) {
+ if (!namespace) throw new Error("No namespace value provided.");
+ if (!(await this.namespaceExists(client, namespace))) return null;
+
+ const weaviateClass = await client.schema
+ .classGetter()
+ .withClassName(camelCase(namespace))
+ .do();
+
+ return {
+ ...weaviateClass,
+ vectorCount: await this.namespaceCount(namespace),
+ };
+ },
+ addVectors: async function (client, vectors = []) {
+ const response = { success: true, errors: new Set([]) };
+ const results = await client.batch
+ .objectsBatcher()
+ .withObjects(...vectors)
+ .do();
+
+ results.forEach((res) => {
+ const { status, errors = [] } = res.result;
+ if (status === "SUCCESS" || errors.length === 0) return;
+ response.success = false;
+ response.errors.add(errors.error?.[0]?.message || null);
+ });
+
+ response.errors = [...response.errors];
+ return response;
+ },
+ hasNamespace: async function (namespace = null) {
+ if (!namespace) return false;
+ const { client } = await this.connect();
+ const weaviateClasses = await this.allNamespaces(client);
+ return weaviateClasses.includes(camelCase(namespace));
+ },
+ namespaceExists: async function (client, namespace = null) {
+ if (!namespace) throw new Error("No namespace value provided.");
+ const weaviateClasses = await this.allNamespaces(client);
+ return weaviateClasses.includes(camelCase(namespace));
+ },
+ deleteVectorsInNamespace: async function (client, namespace = null) {
+ await client.schema.classDeleter().withClassName(camelCase(namespace)).do();
+ return true;
+ },
+ addDocumentToNamespace: async function (
+ namespace,
+ documentData = {},
+ fullFilePath = null
+ ) {
+ const { DocumentVectors } = require("../../../models/vectors");
+ try {
+ const {
+ pageContent,
+ docId,
+ id: _id, // Weaviate will abort if `id` is present in properties
+ ...metadata
+ } = documentData;
+ if (!pageContent || pageContent.length == 0) return false;
+
+ console.log("Adding new vectorized document into namespace", namespace);
+ const cacheResult = await cachedVectorInformation(fullFilePath);
+ if (cacheResult.exists) {
+ const { client } = await this.connect();
+ const weaviateClassExits = await this.hasNamespace(namespace);
+ if (!weaviateClassExits) {
+ await client.schema
+ .classCreator()
+ .withClass({
+ class: camelCase(namespace),
+ description: `Class created by AnythingLLM named ${camelCase(
+ namespace
+ )}`,
+ vectorizer: "none",
+ })
+ .do();
+ }
+
+ const { chunks } = cacheResult;
+ const documentVectors = [];
+ const vectors = [];
+
+ for (const chunk of chunks) {
+ // Before sending to Weaviate and saving the records to our db
+ // we need to assign the id of each chunk that is stored in the cached file.
+ chunk.forEach((chunk) => {
+ const id = uuidv4();
+ const flattenedMetadata = this.flattenObjectForWeaviate(
+ chunk.properties
+ );
+ documentVectors.push({ docId, vectorId: id });
+ const vectorRecord = {
+ id,
+ class: camelCase(namespace),
+ vector: chunk.vector || chunk.values || [],
+ properties: { ...flattenedMetadata },
+ };
+ vectors.push(vectorRecord);
+ });
+
+ const { success: additionResult, errors = [] } =
+ await this.addVectors(client, vectors);
+ if (!additionResult) {
+ console.error("Weaviate::addVectors failed to insert", errors);
+ throw new Error("Error embedding into Weaviate");
+ }
+ }
+
+ await DocumentVectors.bulkInsert(documentVectors);
+ return true;
+ }
+
+ // If we are here then we are going to embed and store a novel document.
+ // We have to do this manually as opposed to using LangChains `Chroma.fromDocuments`
+ // because we then cannot atomically control our namespace to granularly find/remove documents
+ // from vectordb.
+ const textSplitter = new RecursiveCharacterTextSplitter({
+ chunkSize: 1000,
+ chunkOverlap: 20,
+ });
+ const textChunks = await textSplitter.splitText(pageContent);
+
+ console.log("Chunks created from document:", textChunks.length);
+ const LLMConnector = getLLMProvider();
+ const documentVectors = [];
+ const vectors = [];
+ const vectorValues = await LLMConnector.embedChunks(textChunks);
+ const submission = {
+ ids: [],
+ vectors: [],
+ properties: [],
+ };
+
+ if (!!vectorValues && vectorValues.length > 0) {
+ for (const [i, vector] of vectorValues.entries()) {
+ const flattenedMetadata = this.flattenObjectForWeaviate(metadata);
+ const vectorRecord = {
+ class: camelCase(namespace),
+ id: uuidv4(),
+ vector: vector,
+ // [DO NOT REMOVE]
+ // LangChain will be unable to find your text if you embed manually and dont include the `text` key.
+ // https://github.com/hwchase17/langchainjs/blob/5485c4af50c063e257ad54f4393fa79e0aff6462/langchain/src/vectorstores/weaviate.ts#L133
+ properties: { ...flattenedMetadata, text: textChunks[i] },
+ };
+
+ submission.ids.push(vectorRecord.id);
+ submission.vectors.push(vectorRecord.values);
+ submission.properties.push(metadata);
+
+ vectors.push(vectorRecord);
+ documentVectors.push({ docId, vectorId: vectorRecord.id });
+ }
+ } else {
+ console.error(
+ "Could not use OpenAI to embed document chunks! This document will not be recorded."
+ );
+ }
+
+ const { client } = await this.connect();
+ const weaviateClassExits = await this.hasNamespace(namespace);
+ if (!weaviateClassExits) {
+ await client.schema
+ .classCreator()
+ .withClass({
+ class: camelCase(namespace),
+ description: `Class created by AnythingLLM named ${camelCase(
+ namespace
+ )}`,
+ vectorizer: "none",
+ })
+ .do();
+ }
+
+ if (vectors.length > 0) {
+ const chunks = [];
+ for (const chunk of toChunks(vectors, 500)) chunks.push(chunk);
+
+ console.log("Inserting vectorized chunks into Weaviate collection.");
+ const { success: additionResult, errors = [] } = await this.addVectors(
+ client,
+ vectors
+ );
+ if (!additionResult) {
+ console.error("Weaviate::addVectors failed to insert", errors);
+ throw new Error("Error embedding into Weaviate");
+ }
+ await storeVectorResult(chunks, fullFilePath);
+ }
+
+ await DocumentVectors.bulkInsert(documentVectors);
+ return true;
+ } catch (e) {
+ console.error(e);
+ console.error("addDocumentToNamespace", e.message);
+ return false;
+ }
+ },
+ deleteDocumentFromNamespace: async function (namespace, docId) {
+ const { DocumentVectors } = require("../../../models/vectors");
+ const { client } = await this.connect();
+ if (!(await this.namespaceExists(client, namespace))) return;
+
+ const knownDocuments = await DocumentVectors.where(`docId = '${docId}'`);
+ if (knownDocuments.length === 0) return;
+
+ for (const doc of knownDocuments) {
+ await client.data
+ .deleter()
+ .withClassName(camelCase(namespace))
+ .withId(doc.vectorId)
+ .do();
+ }
+
+ const indexes = knownDocuments.map((doc) => doc.id);
+ await DocumentVectors.deleteIds(indexes);
+ return true;
+ },
+ query: async function (reqBody = {}) {
+ const { namespace = null, input, workspace = {} } = reqBody;
+ if (!namespace || !input) throw new Error("Invalid request body");
+
+ const { client } = await this.connect();
+ if (!(await this.namespaceExists(client, namespace))) {
+ return {
+ response: null,
+ sources: [],
+ message: "Invalid query - no documents found for workspace!",
+ };
+ }
+
+ const LLMConnector = getLLMProvider();
+ const queryVector = await LLMConnector.embedTextInput(input);
+ const { contextTexts, sourceDocuments } = await this.similarityResponse(
+ client,
+ namespace,
+ queryVector
+ );
+
+ const prompt = {
+ role: "system",
+ content: `${chatPrompt(workspace)}
+ Context:
+ ${contextTexts
+ .map((text, i) => {
+ return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+ })
+ .join("")}`,
+ };
+ const memory = [prompt, { role: "user", content: input }];
+ const responseText = await LLMConnector.getChatCompletion(memory, {
+ temperature: workspace?.openAiTemp ?? 0.7,
+ });
+
+ return {
+ response: responseText,
+ sources: this.curateSources(sourceDocuments),
+ message: false,
+ };
+ },
+ // This implementation of chat uses the chat history and modifies the system prompt at execution
+ // this is improved over the regular langchain implementation so that chats do not directly modify embeddings
+ // because then multi-user support will have all conversations mutating the base vector collection to which then
+ // the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
+ chat: async function (reqBody = {}) {
+ const {
+ namespace = null,
+ input,
+ workspace = {},
+ chatHistory = [],
+ } = reqBody;
+ if (!namespace || !input) throw new Error("Invalid request body");
+
+ const { client } = await this.connect();
+ if (!(await this.namespaceExists(client, namespace))) {
+ return {
+ response: null,
+ sources: [],
+ message: "Invalid query - no documents found for workspace!",
+ };
+ }
+
+ const LLMConnector = getLLMProvider();
+ const queryVector = await LLMConnector.embedTextInput(input);
+ const { contextTexts, sourceDocuments } = await this.similarityResponse(
+ client,
+ namespace,
+ queryVector
+ );
+ const prompt = {
+ role: "system",
+ content: `${chatPrompt(workspace)}
+ Context:
+ ${contextTexts
+ .map((text, i) => {
+ return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+ })
+ .join("")}`,
+ };
+ const memory = [prompt, ...chatHistory, { role: "user", content: input }];
+ const responseText = await LLMConnector.getChatCompletion(memory, {
+ temperature: workspace?.openAiTemp ?? 0.7,
+ });
+
+ return {
+ response: responseText,
+ sources: this.curateSources(sourceDocuments),
+ message: false,
+ };
+ },
+ "namespace-stats": async function (reqBody = {}) {
+ const { namespace = null } = reqBody;
+ if (!namespace) throw new Error("namespace required");
+ const { client } = await this.connect();
+ const stats = await this.namespace(client, namespace);
+ return stats
+ ? stats
+ : { message: "No stats were able to be fetched from DB for namespace" };
+ },
+ "delete-namespace": async function (reqBody = {}) {
+ const { namespace = null } = reqBody;
+ const { client } = await this.connect();
+ const details = await this.namespace(client, namespace);
+ await this.deleteVectorsInNamespace(client, namespace);
+ return {
+ message: `Namespace ${camelCase(namespace)} was deleted along with ${details?.vectorCount
+ } vectors.`,
+ };
+ },
+ reset: async function () {
+ const { client } = await this.connect();
+ const weaviateClasses = await this.allNamespaces(client);
+ for (const weaviateClass of weaviateClasses) {
+ await client.schema.classDeleter().withClassName(weaviateClass).do();
+ }
+ return { reset: true };
+ },
+ curateSources: function (sources = []) {
+ const documents = [];
+ for (const source of sources) {
+ if (Object.keys(source).length > 0) {
+ documents.push(source);
+ }
+ }
+
+ return documents;
+ },
+ flattenObjectForWeaviate: function (obj = {}) {
+ // Note this function is not generic, it is designed specifically for Weaviate
+ // https://weaviate.io/developers/weaviate/config-refs/datatypes#introduction
+ // Credit to LangchainJS
+ // https://github.com/hwchase17/langchainjs/blob/5485c4af50c063e257ad54f4393fa79e0aff6462/langchain/src/vectorstores/weaviate.ts#L11C1-L50C3
+ const flattenedObject = {};
+
+ for (const key in obj) {
+ if (!Object.hasOwn(obj, key)) {
+ continue;
+ }
+ const value = obj[key];
+ if (typeof obj[key] === "object" && !Array.isArray(value)) {
+ const recursiveResult = this.flattenObjectForWeaviate(value);
+
+ for (const deepKey in recursiveResult) {
+ if (Object.hasOwn(obj, key)) {
+ flattenedObject[`${key}_${deepKey}`] = recursiveResult[deepKey];
+ }
+ }
+ } else if (Array.isArray(value)) {
+ if (
+ value.length > 0 &&
+ typeof value[0] !== "object" &&
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ value.every((el) => typeof el === typeof value[0])
+ ) {
+ // Weaviate only supports arrays of primitive types,
+ // where all elements are of the same type
+ flattenedObject[key] = value;
+ }
+ } else {
+ flattenedObject[key] = value;
+ }
+ }
+
+ return flattenedObject;
+ },
+};
+
+module.exports.Weaviate = Weaviate;
diff --git a/server/yarn.lock b/server/yarn.lock
index cd1514e7..3b1caaa8 100644
--- a/server/yarn.lock
+++ b/server/yarn.lock
@@ -130,6 +130,11 @@
dependencies:
googleapis-common "^6.0.3"
+"@graphql-typed-document-node/core@^3.1.1":
+ version "3.2.0"
+ resolved "https://registry.yarnpkg.com/@graphql-typed-document-node/core/-/core-3.2.0.tgz#5f3d96ec6b2354ad6d8a28bf216a1d97b5426861"
+ integrity sha512-mB9oAsNCm9aM3/SOv4YtBMqZbYj10R7dkq8byBqxGY/ncFwhf2oQzMV+LCRlWoDSEBJ3COiR1yeDvMtsoOsuFQ==
+
"@mapbox/node-pre-gyp@^1.0.0", "@mapbox/node-pre-gyp@^1.0.10":
version "1.0.11"
resolved "https://registry.yarnpkg.com/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz#417db42b7f5323d79e93b34a6d7a2a12c0df43fa"
@@ -916,6 +921,11 @@ extend@^3.0.2:
resolved "https://registry.yarnpkg.com/extend/-/extend-3.0.2.tgz#f8b1136b4071fbd8eb140aff858b1019ec2915fa"
integrity sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==
+extract-files@^9.0.0:
+ version "9.0.0"
+ resolved "https://registry.yarnpkg.com/extract-files/-/extract-files-9.0.0.tgz#8a7744f2437f81f5ed3250ed9f1550de902fe54a"
+ integrity sha512-CvdFfHkC95B4bBBk36hcEmvdR2awOdhhVUYH6S/zrVj3477zven/fJMYg7121h4T1xHZC+tetUpubpAhxwI7hQ==
+
extract-zip@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-2.0.1.tgz#663dca56fe46df890d5f131ef4a06d22bb8ba13a"
@@ -981,6 +991,15 @@ follow-redirects@^1.14.8:
resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.2.tgz#b460864144ba63f2681096f274c4e57026da2c13"
integrity sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==
+form-data@^3.0.0:
+ version "3.0.1"
+ resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f"
+ integrity sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==
+ dependencies:
+ asynckit "^0.4.0"
+ combined-stream "^1.0.8"
+ mime-types "^2.1.12"
+
form-data@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.0.tgz#93919daeaf361ee529584b9b31664dc12c9fa452"
@@ -1149,6 +1168,21 @@ graceful-fs@^4.2.0, graceful-fs@^4.2.6:
resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3"
integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==
+graphql-request@^5.1.0:
+ version "5.2.0"
+ resolved "https://registry.yarnpkg.com/graphql-request/-/graphql-request-5.2.0.tgz#a05fb54a517d91bb2d7aefa17ade4523dc5ebdca"
+ integrity sha512-pLhKIvnMyBERL0dtFI3medKqWOz/RhHdcgbZ+hMMIb32mEPa5MJSzS4AuXxfI4sRAu6JVVk5tvXuGfCWl9JYWQ==
+ dependencies:
+ "@graphql-typed-document-node/core" "^3.1.1"
+ cross-fetch "^3.1.5"
+ extract-files "^9.0.0"
+ form-data "^3.0.0"
+
+graphql@^16.7.1:
+ version "16.7.1"
+ resolved "https://registry.yarnpkg.com/graphql/-/graphql-16.7.1.tgz#11475b74a7bff2aefd4691df52a0eca0abd9b642"
+ integrity sha512-DRYR9tf+UGU0KOsMcKAlXeFfX89UiiIZ0dRU3mR0yJfu6OjZqUcp68NnFLnqQU5RexygFoDy1EW+ccOYcPfmHg==
+
gtoken@^6.1.0:
version "6.1.2"
resolved "https://registry.yarnpkg.com/gtoken/-/gtoken-6.1.2.tgz#aeb7bdb019ff4c3ba3ac100bbe7b6e74dce0e8bc"
@@ -2507,6 +2541,15 @@ vectordb@0.1.12:
"@apache-arrow/ts" "^12.0.0"
apache-arrow "^12.0.0"
+weaviate-ts-client@^1.4.0:
+ version "1.4.0"
+ resolved "https://registry.yarnpkg.com/weaviate-ts-client/-/weaviate-ts-client-1.4.0.tgz#e1adb670f2c1930a82601efb915b0131f6988b7e"
+ integrity sha512-G2V/IWMHXDjoJeATUYKkZXzAs7iRj4GE8B3AX59XDqMRW12X7VUkRgo4xWcHH1bjpLIHUYTzD5qZXcB8P9Hdmw==
+ dependencies:
+ graphql-request "^5.1.0"
+ isomorphic-fetch "^3.0.0"
+ uuid "^9.0.0"
+
webidl-conversions@^3.0.0:
version "3.0.1"
resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871"