Add support for Weaviate VectorDB (#181)

This commit is contained in:
Timothy Carambat 2023-08-08 18:02:30 -07:00 committed by GitHub
parent 4ac5e55413
commit f3a6147ffd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 794 additions and 11 deletions

View File

@ -1,5 +1,6 @@
{
"cSpell.words": [
"openai"
"openai",
"Weaviate"
]
}

View File

@ -32,6 +32,11 @@ PINECONE_INDEX=
# Enable all below if you are using vector database: LanceDB.
# VECTOR_DB="lancedb"
# Enable all below if you are using vector database: Weaviate.
# VECTOR_DB="weaviate"
# WEAVIATE_ENDPOINT="http://localhost:8080"
# WEAVIATE_API_KEY=
# CLOUD DEPLOYMENT VARIRABLES ONLY
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
# NO_DEBUG="true"

View File

@ -7,7 +7,7 @@ import paths from "../../../../utils/paths";
const noop = () => false;
export default function ExportOrImportData({ hideModal = noop }) {
return (
<div className="relative w-full max-w-2xl max-h-full">
<div className="relative w-full w-full max-h-full">
<div className="relative bg-white rounded-lg shadow dark:bg-stone-700">
<div className="flex flex-col items-start justify-between px-6 py-4">
<p className="text-gray-800 dark:text-stone-200 text-base ">

View File

@ -37,7 +37,7 @@ export default function LLMSelection({
setHasChanges(!!error ? true : false);
};
return (
<div className="relative w-full max-w-2xl max-h-full">
<div className="relative w-full w-full max-h-full">
<div className="relative bg-white rounded-lg shadow dark:bg-stone-700">
<div className="flex items-start justify-between px-6 py-4">
<p className="text-gray-800 dark:text-stone-200 text-base ">
@ -59,7 +59,7 @@ export default function LLMSelection({
<p className="block text-sm font-medium text-gray-800 dark:text-slate-200">
LLM providers
</p>
<div className="w-full flex overflow-x-scroll gap-x-4 no-scroll">
<div className="w-full flex overflow-x-scroll gap-x-4">
<input hidden={true} name="LLMProvider" value={llmChoice} />
<LLMProviderOption
name="OpenAI"

View File

@ -39,7 +39,7 @@ export default function MultiUserMode({ hideModal = noop }) {
};
return (
<div className="relative w-full max-w-2xl max-h-full">
<div className="relative w-full w-full max-h-full">
<div className="relative bg-white rounded-lg shadow dark:bg-stone-700">
<div className="flex items-start justify-between px-6 py-4">
<p className="text-gray-800 dark:text-stone-200 text-base ">

View File

@ -41,7 +41,7 @@ export default function PasswordProtection({
};
return (
<div className="relative w-full max-w-2xl max-h-full">
<div className="relative w-full w-full max-h-full">
<div className="relative bg-white rounded-lg shadow dark:bg-stone-700">
<div className="flex items-start justify-between px-6 py-4">
<p className="text-gray-800 dark:text-stone-200 text-base ">

View File

@ -3,6 +3,7 @@ import System from "../../../../models/system";
import ChromaLogo from "../../../../media/vectordbs/chroma.png";
import PineconeLogo from "../../../../media/vectordbs/pinecone.png";
import LanceDbLogo from "../../../../media/vectordbs/lancedb.png";
import WeaviateLogo from "../../../../media/vectordbs/weaviate.png";
const noop = () => false;
export default function VectorDBSelection({
@ -37,7 +38,7 @@ export default function VectorDBSelection({
setHasChanges(!!error ? true : false);
};
return (
<div className="relative w-full max-w-2xl max-h-full">
<div className="relative w-full w-full max-h-full">
<div className="relative bg-white rounded-lg shadow dark:bg-stone-700">
<div className="flex items-start justify-between px-6 py-4">
<p className="text-gray-800 dark:text-stone-200 text-base ">
@ -59,7 +60,7 @@ export default function VectorDBSelection({
<p className="block text-sm font-medium text-gray-800 dark:text-slate-200">
Vector database providers
</p>
<div className="w-full flex overflow-x-scroll gap-x-4 no-scroll">
<div className="w-full flex overflow-x-scroll gap-x-4">
<input hidden={true} name="VectorDB" value={vectorDB} />
<VectorDBOption
name="Chroma"
@ -79,6 +80,15 @@ export default function VectorDBSelection({
image={PineconeLogo}
onClick={updateVectorChoice}
/>
<VectorDBOption
name="Weaviate"
value="weaviate"
link="weaviate.io"
description="Open source local and cloud hosted multi-modal vector database."
checked={vectorDB === "weaviate"}
image={WeaviateLogo}
onClick={updateVectorChoice}
/>
<VectorDBOption
name="LanceDB"
value="lancedb"
@ -171,6 +181,41 @@ export default function VectorDBSelection({
</p>
</div>
)}
{vectorDB === "weaviate" && (
<>
<div>
<label className="block mb-2 text-sm font-medium text-gray-800 dark:text-slate-200">
Weaviate Endpoint
</label>
<input
type="url"
name="WeaviateEndpoint"
disabled={!canDebug}
className="bg-gray-50 border border-gray-500 text-gray-900 placeholder-gray-500 text-sm rounded-lg dark:bg-stone-700 focus:border-stone-500 block w-full p-2.5 dark:text-slate-200 dark:placeholder-stone-500 dark:border-slate-200"
placeholder="http://localhost:8080"
defaultValue={settings?.WeaviateEndpoint}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div>
<label className="block mb-2 text-sm font-medium text-gray-800 dark:text-slate-200">
Api Key
</label>
<input
type="password"
name="WeaviateApiKey"
disabled={!canDebug}
className="bg-gray-50 border border-gray-500 text-gray-900 placeholder-gray-500 text-sm rounded-lg dark:bg-stone-700 focus:border-stone-500 block w-full p-2.5 dark:text-slate-200 dark:placeholder-stone-500 dark:border-slate-200"
placeholder="sk-123Abcweaviate"
defaultValue={settings?.WeaviateApiKey}
autoComplete="off"
spellCheck={false}
/>
</div>
</>
)}
</div>
</div>
<div className="w-full p-4">

View File

@ -46,7 +46,7 @@ export default function SystemSettingsModal({ hideModal = noop }) {
className="flex fixed top-0 left-0 right-0 w-full h-full"
onClick={hideModal}
/>
<div className="relative w-full max-w-2xl max-h-full">
<div className="relative w-full w-full md:w-1/2 max-h-full">
<div className="relative bg-white rounded-lg shadow dark:bg-stone-700">
<div className="flex flex-col gap-y-1 border-b dark:border-gray-600 px-4 pt-4 ">
<div className="flex items-start justify-between rounded-t ">

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

View File

@ -31,6 +31,12 @@ PINECONE_INDEX=
# Enable all below if you are using vector database: LanceDB.
# VECTOR_DB="lancedb"
# Enable all below if you are using vector database: Weaviate.
# VECTOR_DB="weaviate"
# WEAVIATE_ENDPOINT="http://localhost:8080"
# WEAVIATE_API_KEY=
# CLOUD DEPLOYMENT VARIRABLES ONLY
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
# STORAGE_DIR= # absolute filesystem path with no trailing slash

View File

@ -60,6 +60,12 @@ function systemEndpoints(app) {
ChromaEndpoint: process.env.CHROMA_ENDPOINT,
}
: {}),
...(vectorDB === "weaviate"
? {
WeaviateEndpoint: process.env.WEAVIATE_ENDPOINT,
WeaviateApiKey: process.env.WEAVIATE_API_KEY,
}
: {}),
LLMProvider: llmProvider,
...(llmProvider === "openai"
? {

View File

@ -26,6 +26,7 @@
"dotenv": "^16.0.3",
"express": "^4.18.2",
"extract-zip": "^2.0.1",
"graphql": "^16.7.1",
"jsonwebtoken": "^8.5.1",
"langchain": "^0.0.90",
"moment": "^2.29.4",
@ -38,7 +39,8 @@
"sqlite3": "^5.1.6",
"uuid": "^9.0.0",
"uuid-apikey": "^1.5.3",
"vectordb": "0.1.12"
"vectordb": "0.1.12",
"weaviate-ts-client": "^1.4.0"
},
"devDependencies": {
"nodemon": "^2.0.22",

View File

@ -0,0 +1,143 @@
const UPPERCASE = /[\p{Lu}]/u;
const LOWERCASE = /[\p{Ll}]/u;
const LEADING_CAPITAL = /^[\p{Lu}](?![\p{Lu}])/gu;
const IDENTIFIER = /([\p{Alpha}\p{N}_]|$)/u;
const SEPARATORS = /[_.\- ]+/;
const LEADING_SEPARATORS = new RegExp("^" + SEPARATORS.source);
const SEPARATORS_AND_IDENTIFIER = new RegExp(
SEPARATORS.source + IDENTIFIER.source,
"gu"
);
const NUMBERS_AND_IDENTIFIER = new RegExp("\\d+" + IDENTIFIER.source, "gu");
const preserveCamelCase = (
string,
toLowerCase,
toUpperCase,
preserveConsecutiveUppercase
) => {
let isLastCharLower = false;
let isLastCharUpper = false;
let isLastLastCharUpper = false;
let isLastLastCharPreserved = false;
for (let index = 0; index < string.length; index++) {
const character = string[index];
isLastLastCharPreserved = index > 2 ? string[index - 3] === "-" : true;
if (isLastCharLower && UPPERCASE.test(character)) {
string = string.slice(0, index) + "-" + string.slice(index);
isLastCharLower = false;
isLastLastCharUpper = isLastCharUpper;
isLastCharUpper = true;
index++;
} else if (
isLastCharUpper &&
isLastLastCharUpper &&
LOWERCASE.test(character) &&
(!isLastLastCharPreserved || preserveConsecutiveUppercase)
) {
string = string.slice(0, index - 1) + "-" + string.slice(index - 1);
isLastLastCharUpper = isLastCharUpper;
isLastCharUpper = false;
isLastCharLower = true;
} else {
isLastCharLower =
toLowerCase(character) === character &&
toUpperCase(character) !== character;
isLastLastCharUpper = isLastCharUpper;
isLastCharUpper =
toUpperCase(character) === character &&
toLowerCase(character) !== character;
}
}
return string;
};
const preserveConsecutiveUppercase = (input, toLowerCase) => {
LEADING_CAPITAL.lastIndex = 0;
return input.replace(LEADING_CAPITAL, (m1) => toLowerCase(m1));
};
const postProcess = (input, toUpperCase) => {
SEPARATORS_AND_IDENTIFIER.lastIndex = 0;
NUMBERS_AND_IDENTIFIER.lastIndex = 0;
return input
.replace(SEPARATORS_AND_IDENTIFIER, (_, identifier) =>
toUpperCase(identifier)
)
.replace(NUMBERS_AND_IDENTIFIER, (m) => toUpperCase(m));
};
function camelCase(input, options) {
if (!(typeof input === "string" || Array.isArray(input))) {
throw new TypeError("Expected the input to be `string | string[]`");
}
options = {
pascalCase: true,
preserveConsecutiveUppercase: false,
...options,
};
if (Array.isArray(input)) {
input = input
.map((x) => x.trim())
.filter((x) => x.length)
.join("-");
} else {
input = input.trim();
}
if (input.length === 0) {
return "";
}
const toLowerCase =
options.locale === false
? (string) => string.toLowerCase()
: (string) => string.toLocaleLowerCase(options.locale);
const toUpperCase =
options.locale === false
? (string) => string.toUpperCase()
: (string) => string.toLocaleUpperCase(options.locale);
if (input.length === 1) {
if (SEPARATORS.test(input)) {
return "";
}
return options.pascalCase ? toUpperCase(input) : toLowerCase(input);
}
const hasUpperCase = input !== toLowerCase(input);
if (hasUpperCase) {
input = preserveCamelCase(
input,
toLowerCase,
toUpperCase,
options.preserveConsecutiveUppercase
);
}
input = input.replace(LEADING_SEPARATORS, "");
input = options.preserveConsecutiveUppercase
? preserveConsecutiveUppercase(input, toLowerCase)
: toLowerCase(input);
if (options.pascalCase) {
input = toUpperCase(input.charAt(0)) + input.slice(1);
}
return postProcess(input, toUpperCase);
}
module.exports = {
camelCase,
};

View File

@ -10,6 +10,9 @@ function getVectorDbClass() {
case "lancedb":
const { LanceDb } = require("../vectorDbProviders/lance");
return LanceDb;
case "weaviate":
const { Weaviate } = require("../vectorDbProviders/weaviate");
return Weaviate;
default:
throw new Error("ENV: No VECTOR_DB value found in environment!");
}

View File

@ -39,6 +39,15 @@ const KEY_MAPPING = {
envKey: "CHROMA_ENDPOINT",
checks: [isValidURL, validChromaURL],
},
WeaviateEndpoint: {
envKey: "WEAVIATE_ENDPOINT",
checks: [isValidURL],
},
WeaviateApiKey: {
envKey: "WEAVIATE_API_KEY",
checks: [],
},
PineConeEnvironment: {
envKey: "PINECONE_ENVIRONMENT",
checks: [],
@ -103,7 +112,7 @@ function validOpenAIModel(input = "") {
}
function supportedVectorDB(input = "") {
const supported = ["chroma", "pinecone", "lancedb"];
const supported = ["chroma", "pinecone", "lancedb", "weaviate"];
return supported.includes(input)
? null
: `Invalid VectorDB type. Must be one of ${supported.join(", ")}.`;

View File

@ -0,0 +1,17 @@
# How to setup a local (or cloud) Weaviate Vector Database
[Get a Weaviate Cloud instance](https://weaviate.io/developers/weaviate/quickstart#create-an-instance).
[Set up Weaviate locally on Docker](https://weaviate.io/developers/weaviate/installation/docker-compose).
Fill out the variables in the "Vector Database" tab of settings. Select Weaviate as your provider and fill out the appropriate fields
with the information from either of the above steps.
### How to get started _Development mode only_
After setting up either the Weaviate cloud or local dockerized instance you just need to set these variable in `.env.development` or defined them at runtime via the UI.
```
VECTOR_DB="weaviate"
WEAVIATE_ENDPOINT='http://localhost:8080'
WEAVIATE_API_KEY= # Optional
```

View File

@ -0,0 +1,503 @@
const { default: weaviate } = require("weaviate-ts-client");
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
const { storeVectorResult, cachedVectorInformation } = require("../../files");
const { v4: uuidv4 } = require("uuid");
const { toChunks, getLLMProvider } = require("../../helpers");
const { chatPrompt } = require("../../chats");
const { camelCase } = require("../../helpers/camelcase");
const Weaviate = {
name: "Weaviate",
connect: async function () {
if (process.env.VECTOR_DB !== "weaviate")
throw new Error("Weaviate::Invalid ENV settings");
const weaviateUrl = new URL(process.env.WEAVIATE_ENDPOINT);
const options = {
scheme: weaviateUrl.protocol?.replace(":", "") || "http",
host: weaviateUrl?.host,
...(process.env?.WEAVIATE_API_KEY?.length > 0
? { apiKey: new weaviate.ApiKey(process.env?.WEAVIATE_API_KEY) }
: {}),
};
const client = weaviate.client(options);
const isAlive = await await client.misc.liveChecker().do();
if (!isAlive)
throw new Error(
"Weaviate::Invalid Alive signal received - is the service online?"
);
return { client };
},
heartbeat: async function () {
await this.connect();
return { heartbeat: Number(new Date()) };
},
totalIndicies: async function () {
const { client } = await this.connect();
const collectionNames = await this.allNamespaces(client);
var totalVectors = 0;
for (const name of collectionNames) {
totalVectors += await this.namespaceCountWithClient(client, name);
}
return totalVectors;
},
namespaceCountWithClient: async function (client, namespace) {
try {
const response = await client.graphql
.aggregate()
.withClassName(camelCase(namespace))
.withFields("meta { count }")
.do();
return (
response?.data?.Aggregate?.[camelCase(namespace)]?.[0]?.meta?.count || 0
);
} catch (e) {
console.error(`Weaviate:namespaceCountWithClient`, e.message);
return 0;
}
},
namespaceCount: async function (namespace = null) {
try {
const { client } = await this.connect();
const response = await client.graphql
.aggregate()
.withClassName(camelCase(namespace))
.withFields("meta { count }")
.do();
return (
response?.data?.Aggregate?.[camelCase(namespace)]?.[0]?.meta?.count || 0
);
} catch (e) {
console.error(`Weaviate:namespaceCountWithClient`, e.message);
return 0;
}
},
similarityResponse: async function (client, namespace, queryVector) {
const result = {
contextTexts: [],
sourceDocuments: [],
};
const weaviateClass = await this.namespace(client, namespace);
const fields = weaviateClass.properties.map((prop) => prop.name).join(" ");
const queryResponse = await client.graphql
.get()
.withClassName(camelCase(namespace))
.withFields(`${fields} _additional { id }`)
.withNearVector({ vector: queryVector })
.withLimit(4)
.do();
const responses = queryResponse?.data?.Get?.[camelCase(namespace)];
responses.forEach((response) => {
// In Weaviate we have to pluck id from _additional and spread it into the rest
// of the properties.
const {
_additional: { id },
...rest
} = response;
result.contextTexts.push(rest.text);
result.sourceDocuments.push({ ...rest, id });
});
return result;
},
allNamespaces: async function (client) {
try {
const { classes = [] } = await client.schema.getter().do();
return classes.map((classObj) => classObj.class);
} catch (e) {
console.error("Weaviate::AllNamespace", e);
return [];
}
},
namespace: async function (client, namespace = null) {
if (!namespace) throw new Error("No namespace value provided.");
if (!(await this.namespaceExists(client, namespace))) return null;
const weaviateClass = await client.schema
.classGetter()
.withClassName(camelCase(namespace))
.do();
return {
...weaviateClass,
vectorCount: await this.namespaceCount(namespace),
};
},
addVectors: async function (client, vectors = []) {
const response = { success: true, errors: new Set([]) };
const results = await client.batch
.objectsBatcher()
.withObjects(...vectors)
.do();
results.forEach((res) => {
const { status, errors = [] } = res.result;
if (status === "SUCCESS" || errors.length === 0) return;
response.success = false;
response.errors.add(errors.error?.[0]?.message || null);
});
response.errors = [...response.errors];
return response;
},
hasNamespace: async function (namespace = null) {
if (!namespace) return false;
const { client } = await this.connect();
const weaviateClasses = await this.allNamespaces(client);
return weaviateClasses.includes(camelCase(namespace));
},
namespaceExists: async function (client, namespace = null) {
if (!namespace) throw new Error("No namespace value provided.");
const weaviateClasses = await this.allNamespaces(client);
return weaviateClasses.includes(camelCase(namespace));
},
deleteVectorsInNamespace: async function (client, namespace = null) {
await client.schema.classDeleter().withClassName(camelCase(namespace)).do();
return true;
},
addDocumentToNamespace: async function (
namespace,
documentData = {},
fullFilePath = null
) {
const { DocumentVectors } = require("../../../models/vectors");
try {
const {
pageContent,
docId,
id: _id, // Weaviate will abort if `id` is present in properties
...metadata
} = documentData;
if (!pageContent || pageContent.length == 0) return false;
console.log("Adding new vectorized document into namespace", namespace);
const cacheResult = await cachedVectorInformation(fullFilePath);
if (cacheResult.exists) {
const { client } = await this.connect();
const weaviateClassExits = await this.hasNamespace(namespace);
if (!weaviateClassExits) {
await client.schema
.classCreator()
.withClass({
class: camelCase(namespace),
description: `Class created by AnythingLLM named ${camelCase(
namespace
)}`,
vectorizer: "none",
})
.do();
}
const { chunks } = cacheResult;
const documentVectors = [];
const vectors = [];
for (const chunk of chunks) {
// Before sending to Weaviate and saving the records to our db
// we need to assign the id of each chunk that is stored in the cached file.
chunk.forEach((chunk) => {
const id = uuidv4();
const flattenedMetadata = this.flattenObjectForWeaviate(
chunk.properties
);
documentVectors.push({ docId, vectorId: id });
const vectorRecord = {
id,
class: camelCase(namespace),
vector: chunk.vector || chunk.values || [],
properties: { ...flattenedMetadata },
};
vectors.push(vectorRecord);
});
const { success: additionResult, errors = [] } =
await this.addVectors(client, vectors);
if (!additionResult) {
console.error("Weaviate::addVectors failed to insert", errors);
throw new Error("Error embedding into Weaviate");
}
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
}
// If we are here then we are going to embed and store a novel document.
// We have to do this manually as opposed to using LangChains `Chroma.fromDocuments`
// because we then cannot atomically control our namespace to granularly find/remove documents
// from vectordb.
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkOverlap: 20,
});
const textChunks = await textSplitter.splitText(pageContent);
console.log("Chunks created from document:", textChunks.length);
const LLMConnector = getLLMProvider();
const documentVectors = [];
const vectors = [];
const vectorValues = await LLMConnector.embedChunks(textChunks);
const submission = {
ids: [],
vectors: [],
properties: [],
};
if (!!vectorValues && vectorValues.length > 0) {
for (const [i, vector] of vectorValues.entries()) {
const flattenedMetadata = this.flattenObjectForWeaviate(metadata);
const vectorRecord = {
class: camelCase(namespace),
id: uuidv4(),
vector: vector,
// [DO NOT REMOVE]
// LangChain will be unable to find your text if you embed manually and dont include the `text` key.
// https://github.com/hwchase17/langchainjs/blob/5485c4af50c063e257ad54f4393fa79e0aff6462/langchain/src/vectorstores/weaviate.ts#L133
properties: { ...flattenedMetadata, text: textChunks[i] },
};
submission.ids.push(vectorRecord.id);
submission.vectors.push(vectorRecord.values);
submission.properties.push(metadata);
vectors.push(vectorRecord);
documentVectors.push({ docId, vectorId: vectorRecord.id });
}
} else {
console.error(
"Could not use OpenAI to embed document chunks! This document will not be recorded."
);
}
const { client } = await this.connect();
const weaviateClassExits = await this.hasNamespace(namespace);
if (!weaviateClassExits) {
await client.schema
.classCreator()
.withClass({
class: camelCase(namespace),
description: `Class created by AnythingLLM named ${camelCase(
namespace
)}`,
vectorizer: "none",
})
.do();
}
if (vectors.length > 0) {
const chunks = [];
for (const chunk of toChunks(vectors, 500)) chunks.push(chunk);
console.log("Inserting vectorized chunks into Weaviate collection.");
const { success: additionResult, errors = [] } = await this.addVectors(
client,
vectors
);
if (!additionResult) {
console.error("Weaviate::addVectors failed to insert", errors);
throw new Error("Error embedding into Weaviate");
}
await storeVectorResult(chunks, fullFilePath);
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
} catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message);
return false;
}
},
deleteDocumentFromNamespace: async function (namespace, docId) {
const { DocumentVectors } = require("../../../models/vectors");
const { client } = await this.connect();
if (!(await this.namespaceExists(client, namespace))) return;
const knownDocuments = await DocumentVectors.where(`docId = '${docId}'`);
if (knownDocuments.length === 0) return;
for (const doc of knownDocuments) {
await client.data
.deleter()
.withClassName(camelCase(namespace))
.withId(doc.vectorId)
.do();
}
const indexes = knownDocuments.map((doc) => doc.id);
await DocumentVectors.deleteIds(indexes);
return true;
},
query: async function (reqBody = {}) {
const { namespace = null, input, workspace = {} } = reqBody;
if (!namespace || !input) throw new Error("Invalid request body");
const { client } = await this.connect();
if (!(await this.namespaceExists(client, namespace))) {
return {
response: null,
sources: [],
message: "Invalid query - no documents found for workspace!",
};
}
const LLMConnector = getLLMProvider();
const queryVector = await LLMConnector.embedTextInput(input);
const { contextTexts, sourceDocuments } = await this.similarityResponse(
client,
namespace,
queryVector
);
const prompt = {
role: "system",
content: `${chatPrompt(workspace)}
Context:
${contextTexts
.map((text, i) => {
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
})
.join("")}`,
};
const memory = [prompt, { role: "user", content: input }];
const responseText = await LLMConnector.getChatCompletion(memory, {
temperature: workspace?.openAiTemp ?? 0.7,
});
return {
response: responseText,
sources: this.curateSources(sourceDocuments),
message: false,
};
},
// This implementation of chat uses the chat history and modifies the system prompt at execution
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
// because then multi-user support will have all conversations mutating the base vector collection to which then
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
chat: async function (reqBody = {}) {
const {
namespace = null,
input,
workspace = {},
chatHistory = [],
} = reqBody;
if (!namespace || !input) throw new Error("Invalid request body");
const { client } = await this.connect();
if (!(await this.namespaceExists(client, namespace))) {
return {
response: null,
sources: [],
message: "Invalid query - no documents found for workspace!",
};
}
const LLMConnector = getLLMProvider();
const queryVector = await LLMConnector.embedTextInput(input);
const { contextTexts, sourceDocuments } = await this.similarityResponse(
client,
namespace,
queryVector
);
const prompt = {
role: "system",
content: `${chatPrompt(workspace)}
Context:
${contextTexts
.map((text, i) => {
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
})
.join("")}`,
};
const memory = [prompt, ...chatHistory, { role: "user", content: input }];
const responseText = await LLMConnector.getChatCompletion(memory, {
temperature: workspace?.openAiTemp ?? 0.7,
});
return {
response: responseText,
sources: this.curateSources(sourceDocuments),
message: false,
};
},
"namespace-stats": async function (reqBody = {}) {
const { namespace = null } = reqBody;
if (!namespace) throw new Error("namespace required");
const { client } = await this.connect();
const stats = await this.namespace(client, namespace);
return stats
? stats
: { message: "No stats were able to be fetched from DB for namespace" };
},
"delete-namespace": async function (reqBody = {}) {
const { namespace = null } = reqBody;
const { client } = await this.connect();
const details = await this.namespace(client, namespace);
await this.deleteVectorsInNamespace(client, namespace);
return {
message: `Namespace ${camelCase(namespace)} was deleted along with ${details?.vectorCount
} vectors.`,
};
},
reset: async function () {
const { client } = await this.connect();
const weaviateClasses = await this.allNamespaces(client);
for (const weaviateClass of weaviateClasses) {
await client.schema.classDeleter().withClassName(weaviateClass).do();
}
return { reset: true };
},
curateSources: function (sources = []) {
const documents = [];
for (const source of sources) {
if (Object.keys(source).length > 0) {
documents.push(source);
}
}
return documents;
},
flattenObjectForWeaviate: function (obj = {}) {
// Note this function is not generic, it is designed specifically for Weaviate
// https://weaviate.io/developers/weaviate/config-refs/datatypes#introduction
// Credit to LangchainJS
// https://github.com/hwchase17/langchainjs/blob/5485c4af50c063e257ad54f4393fa79e0aff6462/langchain/src/vectorstores/weaviate.ts#L11C1-L50C3
const flattenedObject = {};
for (const key in obj) {
if (!Object.hasOwn(obj, key)) {
continue;
}
const value = obj[key];
if (typeof obj[key] === "object" && !Array.isArray(value)) {
const recursiveResult = this.flattenObjectForWeaviate(value);
for (const deepKey in recursiveResult) {
if (Object.hasOwn(obj, key)) {
flattenedObject[`${key}_${deepKey}`] = recursiveResult[deepKey];
}
}
} else if (Array.isArray(value)) {
if (
value.length > 0 &&
typeof value[0] !== "object" &&
// eslint-disable-next-line @typescript-eslint/no-explicit-any
value.every((el) => typeof el === typeof value[0])
) {
// Weaviate only supports arrays of primitive types,
// where all elements are of the same type
flattenedObject[key] = value;
}
} else {
flattenedObject[key] = value;
}
}
return flattenedObject;
},
};
module.exports.Weaviate = Weaviate;

View File

@ -130,6 +130,11 @@
dependencies:
googleapis-common "^6.0.3"
"@graphql-typed-document-node/core@^3.1.1":
version "3.2.0"
resolved "https://registry.yarnpkg.com/@graphql-typed-document-node/core/-/core-3.2.0.tgz#5f3d96ec6b2354ad6d8a28bf216a1d97b5426861"
integrity sha512-mB9oAsNCm9aM3/SOv4YtBMqZbYj10R7dkq8byBqxGY/ncFwhf2oQzMV+LCRlWoDSEBJ3COiR1yeDvMtsoOsuFQ==
"@mapbox/node-pre-gyp@^1.0.0", "@mapbox/node-pre-gyp@^1.0.10":
version "1.0.11"
resolved "https://registry.yarnpkg.com/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz#417db42b7f5323d79e93b34a6d7a2a12c0df43fa"
@ -916,6 +921,11 @@ extend@^3.0.2:
resolved "https://registry.yarnpkg.com/extend/-/extend-3.0.2.tgz#f8b1136b4071fbd8eb140aff858b1019ec2915fa"
integrity sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==
extract-files@^9.0.0:
version "9.0.0"
resolved "https://registry.yarnpkg.com/extract-files/-/extract-files-9.0.0.tgz#8a7744f2437f81f5ed3250ed9f1550de902fe54a"
integrity sha512-CvdFfHkC95B4bBBk36hcEmvdR2awOdhhVUYH6S/zrVj3477zven/fJMYg7121h4T1xHZC+tetUpubpAhxwI7hQ==
extract-zip@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-2.0.1.tgz#663dca56fe46df890d5f131ef4a06d22bb8ba13a"
@ -981,6 +991,15 @@ follow-redirects@^1.14.8:
resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.2.tgz#b460864144ba63f2681096f274c4e57026da2c13"
integrity sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==
form-data@^3.0.0:
version "3.0.1"
resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f"
integrity sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==
dependencies:
asynckit "^0.4.0"
combined-stream "^1.0.8"
mime-types "^2.1.12"
form-data@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.0.tgz#93919daeaf361ee529584b9b31664dc12c9fa452"
@ -1149,6 +1168,21 @@ graceful-fs@^4.2.0, graceful-fs@^4.2.6:
resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3"
integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==
graphql-request@^5.1.0:
version "5.2.0"
resolved "https://registry.yarnpkg.com/graphql-request/-/graphql-request-5.2.0.tgz#a05fb54a517d91bb2d7aefa17ade4523dc5ebdca"
integrity sha512-pLhKIvnMyBERL0dtFI3medKqWOz/RhHdcgbZ+hMMIb32mEPa5MJSzS4AuXxfI4sRAu6JVVk5tvXuGfCWl9JYWQ==
dependencies:
"@graphql-typed-document-node/core" "^3.1.1"
cross-fetch "^3.1.5"
extract-files "^9.0.0"
form-data "^3.0.0"
graphql@^16.7.1:
version "16.7.1"
resolved "https://registry.yarnpkg.com/graphql/-/graphql-16.7.1.tgz#11475b74a7bff2aefd4691df52a0eca0abd9b642"
integrity sha512-DRYR9tf+UGU0KOsMcKAlXeFfX89UiiIZ0dRU3mR0yJfu6OjZqUcp68NnFLnqQU5RexygFoDy1EW+ccOYcPfmHg==
gtoken@^6.1.0:
version "6.1.2"
resolved "https://registry.yarnpkg.com/gtoken/-/gtoken-6.1.2.tgz#aeb7bdb019ff4c3ba3ac100bbe7b6e74dce0e8bc"
@ -2507,6 +2541,15 @@ vectordb@0.1.12:
"@apache-arrow/ts" "^12.0.0"
apache-arrow "^12.0.0"
weaviate-ts-client@^1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/weaviate-ts-client/-/weaviate-ts-client-1.4.0.tgz#e1adb670f2c1930a82601efb915b0131f6988b7e"
integrity sha512-G2V/IWMHXDjoJeATUYKkZXzAs7iRj4GE8B3AX59XDqMRW12X7VUkRgo4xWcHH1bjpLIHUYTzD5qZXcB8P9Hdmw==
dependencies:
graphql-request "^5.1.0"
isomorphic-fetch "^3.0.0"
uuid "^9.0.0"
webidl-conversions@^3.0.0:
version "3.0.1"
resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871"