mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-18 20:20:11 +01:00
Add support for Weaviate VectorDB (#181)
This commit is contained in:
parent
4ac5e55413
commit
f3a6147ffd
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
@ -1,5 +1,6 @@
|
||||
{
|
||||
"cSpell.words": [
|
||||
"openai"
|
||||
"openai",
|
||||
"Weaviate"
|
||||
]
|
||||
}
|
@ -32,6 +32,11 @@ PINECONE_INDEX=
|
||||
# Enable all below if you are using vector database: LanceDB.
|
||||
# VECTOR_DB="lancedb"
|
||||
|
||||
# Enable all below if you are using vector database: Weaviate.
|
||||
# VECTOR_DB="weaviate"
|
||||
# WEAVIATE_ENDPOINT="http://localhost:8080"
|
||||
# WEAVIATE_API_KEY=
|
||||
|
||||
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
||||
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
||||
# NO_DEBUG="true"
|
||||
|
@ -7,7 +7,7 @@ import paths from "../../../../utils/paths";
|
||||
const noop = () => false;
|
||||
export default function ExportOrImportData({ hideModal = noop }) {
|
||||
return (
|
||||
<div className="relative w-full max-w-2xl max-h-full">
|
||||
<div className="relative w-full w-full max-h-full">
|
||||
<div className="relative bg-white rounded-lg shadow dark:bg-stone-700">
|
||||
<div className="flex flex-col items-start justify-between px-6 py-4">
|
||||
<p className="text-gray-800 dark:text-stone-200 text-base ">
|
||||
|
@ -37,7 +37,7 @@ export default function LLMSelection({
|
||||
setHasChanges(!!error ? true : false);
|
||||
};
|
||||
return (
|
||||
<div className="relative w-full max-w-2xl max-h-full">
|
||||
<div className="relative w-full w-full max-h-full">
|
||||
<div className="relative bg-white rounded-lg shadow dark:bg-stone-700">
|
||||
<div className="flex items-start justify-between px-6 py-4">
|
||||
<p className="text-gray-800 dark:text-stone-200 text-base ">
|
||||
@ -59,7 +59,7 @@ export default function LLMSelection({
|
||||
<p className="block text-sm font-medium text-gray-800 dark:text-slate-200">
|
||||
LLM providers
|
||||
</p>
|
||||
<div className="w-full flex overflow-x-scroll gap-x-4 no-scroll">
|
||||
<div className="w-full flex overflow-x-scroll gap-x-4">
|
||||
<input hidden={true} name="LLMProvider" value={llmChoice} />
|
||||
<LLMProviderOption
|
||||
name="OpenAI"
|
||||
|
@ -39,7 +39,7 @@ export default function MultiUserMode({ hideModal = noop }) {
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="relative w-full max-w-2xl max-h-full">
|
||||
<div className="relative w-full w-full max-h-full">
|
||||
<div className="relative bg-white rounded-lg shadow dark:bg-stone-700">
|
||||
<div className="flex items-start justify-between px-6 py-4">
|
||||
<p className="text-gray-800 dark:text-stone-200 text-base ">
|
||||
|
@ -41,7 +41,7 @@ export default function PasswordProtection({
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="relative w-full max-w-2xl max-h-full">
|
||||
<div className="relative w-full w-full max-h-full">
|
||||
<div className="relative bg-white rounded-lg shadow dark:bg-stone-700">
|
||||
<div className="flex items-start justify-between px-6 py-4">
|
||||
<p className="text-gray-800 dark:text-stone-200 text-base ">
|
||||
|
@ -3,6 +3,7 @@ import System from "../../../../models/system";
|
||||
import ChromaLogo from "../../../../media/vectordbs/chroma.png";
|
||||
import PineconeLogo from "../../../../media/vectordbs/pinecone.png";
|
||||
import LanceDbLogo from "../../../../media/vectordbs/lancedb.png";
|
||||
import WeaviateLogo from "../../../../media/vectordbs/weaviate.png";
|
||||
|
||||
const noop = () => false;
|
||||
export default function VectorDBSelection({
|
||||
@ -37,7 +38,7 @@ export default function VectorDBSelection({
|
||||
setHasChanges(!!error ? true : false);
|
||||
};
|
||||
return (
|
||||
<div className="relative w-full max-w-2xl max-h-full">
|
||||
<div className="relative w-full w-full max-h-full">
|
||||
<div className="relative bg-white rounded-lg shadow dark:bg-stone-700">
|
||||
<div className="flex items-start justify-between px-6 py-4">
|
||||
<p className="text-gray-800 dark:text-stone-200 text-base ">
|
||||
@ -59,7 +60,7 @@ export default function VectorDBSelection({
|
||||
<p className="block text-sm font-medium text-gray-800 dark:text-slate-200">
|
||||
Vector database providers
|
||||
</p>
|
||||
<div className="w-full flex overflow-x-scroll gap-x-4 no-scroll">
|
||||
<div className="w-full flex overflow-x-scroll gap-x-4">
|
||||
<input hidden={true} name="VectorDB" value={vectorDB} />
|
||||
<VectorDBOption
|
||||
name="Chroma"
|
||||
@ -79,6 +80,15 @@ export default function VectorDBSelection({
|
||||
image={PineconeLogo}
|
||||
onClick={updateVectorChoice}
|
||||
/>
|
||||
<VectorDBOption
|
||||
name="Weaviate"
|
||||
value="weaviate"
|
||||
link="weaviate.io"
|
||||
description="Open source local and cloud hosted multi-modal vector database."
|
||||
checked={vectorDB === "weaviate"}
|
||||
image={WeaviateLogo}
|
||||
onClick={updateVectorChoice}
|
||||
/>
|
||||
<VectorDBOption
|
||||
name="LanceDB"
|
||||
value="lancedb"
|
||||
@ -171,6 +181,41 @@ export default function VectorDBSelection({
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
{vectorDB === "weaviate" && (
|
||||
<>
|
||||
<div>
|
||||
<label className="block mb-2 text-sm font-medium text-gray-800 dark:text-slate-200">
|
||||
Weaviate Endpoint
|
||||
</label>
|
||||
<input
|
||||
type="url"
|
||||
name="WeaviateEndpoint"
|
||||
disabled={!canDebug}
|
||||
className="bg-gray-50 border border-gray-500 text-gray-900 placeholder-gray-500 text-sm rounded-lg dark:bg-stone-700 focus:border-stone-500 block w-full p-2.5 dark:text-slate-200 dark:placeholder-stone-500 dark:border-slate-200"
|
||||
placeholder="http://localhost:8080"
|
||||
defaultValue={settings?.WeaviateEndpoint}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="block mb-2 text-sm font-medium text-gray-800 dark:text-slate-200">
|
||||
Api Key
|
||||
</label>
|
||||
<input
|
||||
type="password"
|
||||
name="WeaviateApiKey"
|
||||
disabled={!canDebug}
|
||||
className="bg-gray-50 border border-gray-500 text-gray-900 placeholder-gray-500 text-sm rounded-lg dark:bg-stone-700 focus:border-stone-500 block w-full p-2.5 dark:text-slate-200 dark:placeholder-stone-500 dark:border-slate-200"
|
||||
placeholder="sk-123Abcweaviate"
|
||||
defaultValue={settings?.WeaviateApiKey}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="w-full p-4">
|
||||
|
@ -46,7 +46,7 @@ export default function SystemSettingsModal({ hideModal = noop }) {
|
||||
className="flex fixed top-0 left-0 right-0 w-full h-full"
|
||||
onClick={hideModal}
|
||||
/>
|
||||
<div className="relative w-full max-w-2xl max-h-full">
|
||||
<div className="relative w-full w-full md:w-1/2 max-h-full">
|
||||
<div className="relative bg-white rounded-lg shadow dark:bg-stone-700">
|
||||
<div className="flex flex-col gap-y-1 border-b dark:border-gray-600 px-4 pt-4 ">
|
||||
<div className="flex items-start justify-between rounded-t ">
|
||||
|
BIN
frontend/src/media/vectordbs/weaviate.png
Normal file
BIN
frontend/src/media/vectordbs/weaviate.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 31 KiB |
@ -31,6 +31,12 @@ PINECONE_INDEX=
|
||||
# Enable all below if you are using vector database: LanceDB.
|
||||
# VECTOR_DB="lancedb"
|
||||
|
||||
# Enable all below if you are using vector database: Weaviate.
|
||||
# VECTOR_DB="weaviate"
|
||||
# WEAVIATE_ENDPOINT="http://localhost:8080"
|
||||
# WEAVIATE_API_KEY=
|
||||
|
||||
|
||||
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
||||
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
||||
# STORAGE_DIR= # absolute filesystem path with no trailing slash
|
||||
|
@ -60,6 +60,12 @@ function systemEndpoints(app) {
|
||||
ChromaEndpoint: process.env.CHROMA_ENDPOINT,
|
||||
}
|
||||
: {}),
|
||||
...(vectorDB === "weaviate"
|
||||
? {
|
||||
WeaviateEndpoint: process.env.WEAVIATE_ENDPOINT,
|
||||
WeaviateApiKey: process.env.WEAVIATE_API_KEY,
|
||||
}
|
||||
: {}),
|
||||
LLMProvider: llmProvider,
|
||||
...(llmProvider === "openai"
|
||||
? {
|
||||
|
@ -26,6 +26,7 @@
|
||||
"dotenv": "^16.0.3",
|
||||
"express": "^4.18.2",
|
||||
"extract-zip": "^2.0.1",
|
||||
"graphql": "^16.7.1",
|
||||
"jsonwebtoken": "^8.5.1",
|
||||
"langchain": "^0.0.90",
|
||||
"moment": "^2.29.4",
|
||||
@ -38,7 +39,8 @@
|
||||
"sqlite3": "^5.1.6",
|
||||
"uuid": "^9.0.0",
|
||||
"uuid-apikey": "^1.5.3",
|
||||
"vectordb": "0.1.12"
|
||||
"vectordb": "0.1.12",
|
||||
"weaviate-ts-client": "^1.4.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"nodemon": "^2.0.22",
|
||||
|
143
server/utils/helpers/camelcase.js
Normal file
143
server/utils/helpers/camelcase.js
Normal file
@ -0,0 +1,143 @@
|
||||
const UPPERCASE = /[\p{Lu}]/u;
|
||||
const LOWERCASE = /[\p{Ll}]/u;
|
||||
const LEADING_CAPITAL = /^[\p{Lu}](?![\p{Lu}])/gu;
|
||||
const IDENTIFIER = /([\p{Alpha}\p{N}_]|$)/u;
|
||||
const SEPARATORS = /[_.\- ]+/;
|
||||
|
||||
const LEADING_SEPARATORS = new RegExp("^" + SEPARATORS.source);
|
||||
const SEPARATORS_AND_IDENTIFIER = new RegExp(
|
||||
SEPARATORS.source + IDENTIFIER.source,
|
||||
"gu"
|
||||
);
|
||||
const NUMBERS_AND_IDENTIFIER = new RegExp("\\d+" + IDENTIFIER.source, "gu");
|
||||
|
||||
const preserveCamelCase = (
|
||||
string,
|
||||
toLowerCase,
|
||||
toUpperCase,
|
||||
preserveConsecutiveUppercase
|
||||
) => {
|
||||
let isLastCharLower = false;
|
||||
let isLastCharUpper = false;
|
||||
let isLastLastCharUpper = false;
|
||||
let isLastLastCharPreserved = false;
|
||||
|
||||
for (let index = 0; index < string.length; index++) {
|
||||
const character = string[index];
|
||||
isLastLastCharPreserved = index > 2 ? string[index - 3] === "-" : true;
|
||||
|
||||
if (isLastCharLower && UPPERCASE.test(character)) {
|
||||
string = string.slice(0, index) + "-" + string.slice(index);
|
||||
isLastCharLower = false;
|
||||
isLastLastCharUpper = isLastCharUpper;
|
||||
isLastCharUpper = true;
|
||||
index++;
|
||||
} else if (
|
||||
isLastCharUpper &&
|
||||
isLastLastCharUpper &&
|
||||
LOWERCASE.test(character) &&
|
||||
(!isLastLastCharPreserved || preserveConsecutiveUppercase)
|
||||
) {
|
||||
string = string.slice(0, index - 1) + "-" + string.slice(index - 1);
|
||||
isLastLastCharUpper = isLastCharUpper;
|
||||
isLastCharUpper = false;
|
||||
isLastCharLower = true;
|
||||
} else {
|
||||
isLastCharLower =
|
||||
toLowerCase(character) === character &&
|
||||
toUpperCase(character) !== character;
|
||||
isLastLastCharUpper = isLastCharUpper;
|
||||
isLastCharUpper =
|
||||
toUpperCase(character) === character &&
|
||||
toLowerCase(character) !== character;
|
||||
}
|
||||
}
|
||||
|
||||
return string;
|
||||
};
|
||||
|
||||
const preserveConsecutiveUppercase = (input, toLowerCase) => {
|
||||
LEADING_CAPITAL.lastIndex = 0;
|
||||
|
||||
return input.replace(LEADING_CAPITAL, (m1) => toLowerCase(m1));
|
||||
};
|
||||
|
||||
const postProcess = (input, toUpperCase) => {
|
||||
SEPARATORS_AND_IDENTIFIER.lastIndex = 0;
|
||||
NUMBERS_AND_IDENTIFIER.lastIndex = 0;
|
||||
|
||||
return input
|
||||
.replace(SEPARATORS_AND_IDENTIFIER, (_, identifier) =>
|
||||
toUpperCase(identifier)
|
||||
)
|
||||
.replace(NUMBERS_AND_IDENTIFIER, (m) => toUpperCase(m));
|
||||
};
|
||||
|
||||
function camelCase(input, options) {
|
||||
if (!(typeof input === "string" || Array.isArray(input))) {
|
||||
throw new TypeError("Expected the input to be `string | string[]`");
|
||||
}
|
||||
|
||||
options = {
|
||||
pascalCase: true,
|
||||
preserveConsecutiveUppercase: false,
|
||||
...options,
|
||||
};
|
||||
|
||||
if (Array.isArray(input)) {
|
||||
input = input
|
||||
.map((x) => x.trim())
|
||||
.filter((x) => x.length)
|
||||
.join("-");
|
||||
} else {
|
||||
input = input.trim();
|
||||
}
|
||||
|
||||
if (input.length === 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const toLowerCase =
|
||||
options.locale === false
|
||||
? (string) => string.toLowerCase()
|
||||
: (string) => string.toLocaleLowerCase(options.locale);
|
||||
|
||||
const toUpperCase =
|
||||
options.locale === false
|
||||
? (string) => string.toUpperCase()
|
||||
: (string) => string.toLocaleUpperCase(options.locale);
|
||||
|
||||
if (input.length === 1) {
|
||||
if (SEPARATORS.test(input)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
return options.pascalCase ? toUpperCase(input) : toLowerCase(input);
|
||||
}
|
||||
|
||||
const hasUpperCase = input !== toLowerCase(input);
|
||||
|
||||
if (hasUpperCase) {
|
||||
input = preserveCamelCase(
|
||||
input,
|
||||
toLowerCase,
|
||||
toUpperCase,
|
||||
options.preserveConsecutiveUppercase
|
||||
);
|
||||
}
|
||||
|
||||
input = input.replace(LEADING_SEPARATORS, "");
|
||||
input = options.preserveConsecutiveUppercase
|
||||
? preserveConsecutiveUppercase(input, toLowerCase)
|
||||
: toLowerCase(input);
|
||||
|
||||
if (options.pascalCase) {
|
||||
input = toUpperCase(input.charAt(0)) + input.slice(1);
|
||||
}
|
||||
|
||||
return postProcess(input, toUpperCase);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
camelCase,
|
||||
};
|
@ -10,6 +10,9 @@ function getVectorDbClass() {
|
||||
case "lancedb":
|
||||
const { LanceDb } = require("../vectorDbProviders/lance");
|
||||
return LanceDb;
|
||||
case "weaviate":
|
||||
const { Weaviate } = require("../vectorDbProviders/weaviate");
|
||||
return Weaviate;
|
||||
default:
|
||||
throw new Error("ENV: No VECTOR_DB value found in environment!");
|
||||
}
|
||||
|
@ -39,6 +39,15 @@ const KEY_MAPPING = {
|
||||
envKey: "CHROMA_ENDPOINT",
|
||||
checks: [isValidURL, validChromaURL],
|
||||
},
|
||||
WeaviateEndpoint: {
|
||||
envKey: "WEAVIATE_ENDPOINT",
|
||||
checks: [isValidURL],
|
||||
},
|
||||
WeaviateApiKey: {
|
||||
envKey: "WEAVIATE_API_KEY",
|
||||
checks: [],
|
||||
},
|
||||
|
||||
PineConeEnvironment: {
|
||||
envKey: "PINECONE_ENVIRONMENT",
|
||||
checks: [],
|
||||
@ -103,7 +112,7 @@ function validOpenAIModel(input = "") {
|
||||
}
|
||||
|
||||
function supportedVectorDB(input = "") {
|
||||
const supported = ["chroma", "pinecone", "lancedb"];
|
||||
const supported = ["chroma", "pinecone", "lancedb", "weaviate"];
|
||||
return supported.includes(input)
|
||||
? null
|
||||
: `Invalid VectorDB type. Must be one of ${supported.join(", ")}.`;
|
||||
|
17
server/utils/vectorDbProviders/weaviate/WEAVIATE_SETUP.md
Normal file
17
server/utils/vectorDbProviders/weaviate/WEAVIATE_SETUP.md
Normal file
@ -0,0 +1,17 @@
|
||||
# How to setup a local (or cloud) Weaviate Vector Database
|
||||
|
||||
[Get a Weaviate Cloud instance](https://weaviate.io/developers/weaviate/quickstart#create-an-instance).
|
||||
[Set up Weaviate locally on Docker](https://weaviate.io/developers/weaviate/installation/docker-compose).
|
||||
|
||||
Fill out the variables in the "Vector Database" tab of settings. Select Weaviate as your provider and fill out the appropriate fields
|
||||
with the information from either of the above steps.
|
||||
|
||||
### How to get started _Development mode only_
|
||||
|
||||
After setting up either the Weaviate cloud or local dockerized instance you just need to set these variable in `.env.development` or defined them at runtime via the UI.
|
||||
|
||||
```
|
||||
VECTOR_DB="weaviate"
|
||||
WEAVIATE_ENDPOINT='http://localhost:8080'
|
||||
WEAVIATE_API_KEY= # Optional
|
||||
```
|
503
server/utils/vectorDbProviders/weaviate/index.js
Normal file
503
server/utils/vectorDbProviders/weaviate/index.js
Normal file
@ -0,0 +1,503 @@
|
||||
const { default: weaviate } = require("weaviate-ts-client");
|
||||
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
||||
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
||||
const { v4: uuidv4 } = require("uuid");
|
||||
const { toChunks, getLLMProvider } = require("../../helpers");
|
||||
const { chatPrompt } = require("../../chats");
|
||||
const { camelCase } = require("../../helpers/camelcase");
|
||||
|
||||
const Weaviate = {
|
||||
name: "Weaviate",
|
||||
connect: async function () {
|
||||
if (process.env.VECTOR_DB !== "weaviate")
|
||||
throw new Error("Weaviate::Invalid ENV settings");
|
||||
|
||||
const weaviateUrl = new URL(process.env.WEAVIATE_ENDPOINT);
|
||||
const options = {
|
||||
scheme: weaviateUrl.protocol?.replace(":", "") || "http",
|
||||
host: weaviateUrl?.host,
|
||||
...(process.env?.WEAVIATE_API_KEY?.length > 0
|
||||
? { apiKey: new weaviate.ApiKey(process.env?.WEAVIATE_API_KEY) }
|
||||
: {}),
|
||||
};
|
||||
const client = weaviate.client(options);
|
||||
const isAlive = await await client.misc.liveChecker().do();
|
||||
if (!isAlive)
|
||||
throw new Error(
|
||||
"Weaviate::Invalid Alive signal received - is the service online?"
|
||||
);
|
||||
return { client };
|
||||
},
|
||||
heartbeat: async function () {
|
||||
await this.connect();
|
||||
return { heartbeat: Number(new Date()) };
|
||||
},
|
||||
totalIndicies: async function () {
|
||||
const { client } = await this.connect();
|
||||
const collectionNames = await this.allNamespaces(client);
|
||||
var totalVectors = 0;
|
||||
for (const name of collectionNames) {
|
||||
totalVectors += await this.namespaceCountWithClient(client, name);
|
||||
}
|
||||
return totalVectors;
|
||||
},
|
||||
namespaceCountWithClient: async function (client, namespace) {
|
||||
try {
|
||||
const response = await client.graphql
|
||||
.aggregate()
|
||||
.withClassName(camelCase(namespace))
|
||||
.withFields("meta { count }")
|
||||
.do();
|
||||
return (
|
||||
response?.data?.Aggregate?.[camelCase(namespace)]?.[0]?.meta?.count || 0
|
||||
);
|
||||
} catch (e) {
|
||||
console.error(`Weaviate:namespaceCountWithClient`, e.message);
|
||||
return 0;
|
||||
}
|
||||
},
|
||||
namespaceCount: async function (namespace = null) {
|
||||
try {
|
||||
const { client } = await this.connect();
|
||||
const response = await client.graphql
|
||||
.aggregate()
|
||||
.withClassName(camelCase(namespace))
|
||||
.withFields("meta { count }")
|
||||
.do();
|
||||
|
||||
return (
|
||||
response?.data?.Aggregate?.[camelCase(namespace)]?.[0]?.meta?.count || 0
|
||||
);
|
||||
} catch (e) {
|
||||
console.error(`Weaviate:namespaceCountWithClient`, e.message);
|
||||
return 0;
|
||||
}
|
||||
},
|
||||
similarityResponse: async function (client, namespace, queryVector) {
|
||||
const result = {
|
||||
contextTexts: [],
|
||||
sourceDocuments: [],
|
||||
};
|
||||
|
||||
const weaviateClass = await this.namespace(client, namespace);
|
||||
const fields = weaviateClass.properties.map((prop) => prop.name).join(" ");
|
||||
const queryResponse = await client.graphql
|
||||
.get()
|
||||
.withClassName(camelCase(namespace))
|
||||
.withFields(`${fields} _additional { id }`)
|
||||
.withNearVector({ vector: queryVector })
|
||||
.withLimit(4)
|
||||
.do();
|
||||
|
||||
const responses = queryResponse?.data?.Get?.[camelCase(namespace)];
|
||||
responses.forEach((response) => {
|
||||
// In Weaviate we have to pluck id from _additional and spread it into the rest
|
||||
// of the properties.
|
||||
const {
|
||||
_additional: { id },
|
||||
...rest
|
||||
} = response;
|
||||
result.contextTexts.push(rest.text);
|
||||
result.sourceDocuments.push({ ...rest, id });
|
||||
});
|
||||
|
||||
return result;
|
||||
},
|
||||
allNamespaces: async function (client) {
|
||||
try {
|
||||
const { classes = [] } = await client.schema.getter().do();
|
||||
return classes.map((classObj) => classObj.class);
|
||||
} catch (e) {
|
||||
console.error("Weaviate::AllNamespace", e);
|
||||
return [];
|
||||
}
|
||||
},
|
||||
namespace: async function (client, namespace = null) {
|
||||
if (!namespace) throw new Error("No namespace value provided.");
|
||||
if (!(await this.namespaceExists(client, namespace))) return null;
|
||||
|
||||
const weaviateClass = await client.schema
|
||||
.classGetter()
|
||||
.withClassName(camelCase(namespace))
|
||||
.do();
|
||||
|
||||
return {
|
||||
...weaviateClass,
|
||||
vectorCount: await this.namespaceCount(namespace),
|
||||
};
|
||||
},
|
||||
addVectors: async function (client, vectors = []) {
|
||||
const response = { success: true, errors: new Set([]) };
|
||||
const results = await client.batch
|
||||
.objectsBatcher()
|
||||
.withObjects(...vectors)
|
||||
.do();
|
||||
|
||||
results.forEach((res) => {
|
||||
const { status, errors = [] } = res.result;
|
||||
if (status === "SUCCESS" || errors.length === 0) return;
|
||||
response.success = false;
|
||||
response.errors.add(errors.error?.[0]?.message || null);
|
||||
});
|
||||
|
||||
response.errors = [...response.errors];
|
||||
return response;
|
||||
},
|
||||
hasNamespace: async function (namespace = null) {
|
||||
if (!namespace) return false;
|
||||
const { client } = await this.connect();
|
||||
const weaviateClasses = await this.allNamespaces(client);
|
||||
return weaviateClasses.includes(camelCase(namespace));
|
||||
},
|
||||
namespaceExists: async function (client, namespace = null) {
|
||||
if (!namespace) throw new Error("No namespace value provided.");
|
||||
const weaviateClasses = await this.allNamespaces(client);
|
||||
return weaviateClasses.includes(camelCase(namespace));
|
||||
},
|
||||
deleteVectorsInNamespace: async function (client, namespace = null) {
|
||||
await client.schema.classDeleter().withClassName(camelCase(namespace)).do();
|
||||
return true;
|
||||
},
|
||||
addDocumentToNamespace: async function (
|
||||
namespace,
|
||||
documentData = {},
|
||||
fullFilePath = null
|
||||
) {
|
||||
const { DocumentVectors } = require("../../../models/vectors");
|
||||
try {
|
||||
const {
|
||||
pageContent,
|
||||
docId,
|
||||
id: _id, // Weaviate will abort if `id` is present in properties
|
||||
...metadata
|
||||
} = documentData;
|
||||
if (!pageContent || pageContent.length == 0) return false;
|
||||
|
||||
console.log("Adding new vectorized document into namespace", namespace);
|
||||
const cacheResult = await cachedVectorInformation(fullFilePath);
|
||||
if (cacheResult.exists) {
|
||||
const { client } = await this.connect();
|
||||
const weaviateClassExits = await this.hasNamespace(namespace);
|
||||
if (!weaviateClassExits) {
|
||||
await client.schema
|
||||
.classCreator()
|
||||
.withClass({
|
||||
class: camelCase(namespace),
|
||||
description: `Class created by AnythingLLM named ${camelCase(
|
||||
namespace
|
||||
)}`,
|
||||
vectorizer: "none",
|
||||
})
|
||||
.do();
|
||||
}
|
||||
|
||||
const { chunks } = cacheResult;
|
||||
const documentVectors = [];
|
||||
const vectors = [];
|
||||
|
||||
for (const chunk of chunks) {
|
||||
// Before sending to Weaviate and saving the records to our db
|
||||
// we need to assign the id of each chunk that is stored in the cached file.
|
||||
chunk.forEach((chunk) => {
|
||||
const id = uuidv4();
|
||||
const flattenedMetadata = this.flattenObjectForWeaviate(
|
||||
chunk.properties
|
||||
);
|
||||
documentVectors.push({ docId, vectorId: id });
|
||||
const vectorRecord = {
|
||||
id,
|
||||
class: camelCase(namespace),
|
||||
vector: chunk.vector || chunk.values || [],
|
||||
properties: { ...flattenedMetadata },
|
||||
};
|
||||
vectors.push(vectorRecord);
|
||||
});
|
||||
|
||||
const { success: additionResult, errors = [] } =
|
||||
await this.addVectors(client, vectors);
|
||||
if (!additionResult) {
|
||||
console.error("Weaviate::addVectors failed to insert", errors);
|
||||
throw new Error("Error embedding into Weaviate");
|
||||
}
|
||||
}
|
||||
|
||||
await DocumentVectors.bulkInsert(documentVectors);
|
||||
return true;
|
||||
}
|
||||
|
||||
// If we are here then we are going to embed and store a novel document.
|
||||
// We have to do this manually as opposed to using LangChains `Chroma.fromDocuments`
|
||||
// because we then cannot atomically control our namespace to granularly find/remove documents
|
||||
// from vectordb.
|
||||
const textSplitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize: 1000,
|
||||
chunkOverlap: 20,
|
||||
});
|
||||
const textChunks = await textSplitter.splitText(pageContent);
|
||||
|
||||
console.log("Chunks created from document:", textChunks.length);
|
||||
const LLMConnector = getLLMProvider();
|
||||
const documentVectors = [];
|
||||
const vectors = [];
|
||||
const vectorValues = await LLMConnector.embedChunks(textChunks);
|
||||
const submission = {
|
||||
ids: [],
|
||||
vectors: [],
|
||||
properties: [],
|
||||
};
|
||||
|
||||
if (!!vectorValues && vectorValues.length > 0) {
|
||||
for (const [i, vector] of vectorValues.entries()) {
|
||||
const flattenedMetadata = this.flattenObjectForWeaviate(metadata);
|
||||
const vectorRecord = {
|
||||
class: camelCase(namespace),
|
||||
id: uuidv4(),
|
||||
vector: vector,
|
||||
// [DO NOT REMOVE]
|
||||
// LangChain will be unable to find your text if you embed manually and dont include the `text` key.
|
||||
// https://github.com/hwchase17/langchainjs/blob/5485c4af50c063e257ad54f4393fa79e0aff6462/langchain/src/vectorstores/weaviate.ts#L133
|
||||
properties: { ...flattenedMetadata, text: textChunks[i] },
|
||||
};
|
||||
|
||||
submission.ids.push(vectorRecord.id);
|
||||
submission.vectors.push(vectorRecord.values);
|
||||
submission.properties.push(metadata);
|
||||
|
||||
vectors.push(vectorRecord);
|
||||
documentVectors.push({ docId, vectorId: vectorRecord.id });
|
||||
}
|
||||
} else {
|
||||
console.error(
|
||||
"Could not use OpenAI to embed document chunks! This document will not be recorded."
|
||||
);
|
||||
}
|
||||
|
||||
const { client } = await this.connect();
|
||||
const weaviateClassExits = await this.hasNamespace(namespace);
|
||||
if (!weaviateClassExits) {
|
||||
await client.schema
|
||||
.classCreator()
|
||||
.withClass({
|
||||
class: camelCase(namespace),
|
||||
description: `Class created by AnythingLLM named ${camelCase(
|
||||
namespace
|
||||
)}`,
|
||||
vectorizer: "none",
|
||||
})
|
||||
.do();
|
||||
}
|
||||
|
||||
if (vectors.length > 0) {
|
||||
const chunks = [];
|
||||
for (const chunk of toChunks(vectors, 500)) chunks.push(chunk);
|
||||
|
||||
console.log("Inserting vectorized chunks into Weaviate collection.");
|
||||
const { success: additionResult, errors = [] } = await this.addVectors(
|
||||
client,
|
||||
vectors
|
||||
);
|
||||
if (!additionResult) {
|
||||
console.error("Weaviate::addVectors failed to insert", errors);
|
||||
throw new Error("Error embedding into Weaviate");
|
||||
}
|
||||
await storeVectorResult(chunks, fullFilePath);
|
||||
}
|
||||
|
||||
await DocumentVectors.bulkInsert(documentVectors);
|
||||
return true;
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
console.error("addDocumentToNamespace", e.message);
|
||||
return false;
|
||||
}
|
||||
},
|
||||
deleteDocumentFromNamespace: async function (namespace, docId) {
|
||||
const { DocumentVectors } = require("../../../models/vectors");
|
||||
const { client } = await this.connect();
|
||||
if (!(await this.namespaceExists(client, namespace))) return;
|
||||
|
||||
const knownDocuments = await DocumentVectors.where(`docId = '${docId}'`);
|
||||
if (knownDocuments.length === 0) return;
|
||||
|
||||
for (const doc of knownDocuments) {
|
||||
await client.data
|
||||
.deleter()
|
||||
.withClassName(camelCase(namespace))
|
||||
.withId(doc.vectorId)
|
||||
.do();
|
||||
}
|
||||
|
||||
const indexes = knownDocuments.map((doc) => doc.id);
|
||||
await DocumentVectors.deleteIds(indexes);
|
||||
return true;
|
||||
},
|
||||
query: async function (reqBody = {}) {
|
||||
const { namespace = null, input, workspace = {} } = reqBody;
|
||||
if (!namespace || !input) throw new Error("Invalid request body");
|
||||
|
||||
const { client } = await this.connect();
|
||||
if (!(await this.namespaceExists(client, namespace))) {
|
||||
return {
|
||||
response: null,
|
||||
sources: [],
|
||||
message: "Invalid query - no documents found for workspace!",
|
||||
};
|
||||
}
|
||||
|
||||
const LLMConnector = getLLMProvider();
|
||||
const queryVector = await LLMConnector.embedTextInput(input);
|
||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
);
|
||||
|
||||
const prompt = {
|
||||
role: "system",
|
||||
content: `${chatPrompt(workspace)}
|
||||
Context:
|
||||
${contextTexts
|
||||
.map((text, i) => {
|
||||
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
|
||||
})
|
||||
.join("")}`,
|
||||
};
|
||||
const memory = [prompt, { role: "user", content: input }];
|
||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
return {
|
||||
response: responseText,
|
||||
sources: this.curateSources(sourceDocuments),
|
||||
message: false,
|
||||
};
|
||||
},
|
||||
// This implementation of chat uses the chat history and modifies the system prompt at execution
|
||||
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
|
||||
// because then multi-user support will have all conversations mutating the base vector collection to which then
|
||||
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
|
||||
chat: async function (reqBody = {}) {
|
||||
const {
|
||||
namespace = null,
|
||||
input,
|
||||
workspace = {},
|
||||
chatHistory = [],
|
||||
} = reqBody;
|
||||
if (!namespace || !input) throw new Error("Invalid request body");
|
||||
|
||||
const { client } = await this.connect();
|
||||
if (!(await this.namespaceExists(client, namespace))) {
|
||||
return {
|
||||
response: null,
|
||||
sources: [],
|
||||
message: "Invalid query - no documents found for workspace!",
|
||||
};
|
||||
}
|
||||
|
||||
const LLMConnector = getLLMProvider();
|
||||
const queryVector = await LLMConnector.embedTextInput(input);
|
||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
);
|
||||
const prompt = {
|
||||
role: "system",
|
||||
content: `${chatPrompt(workspace)}
|
||||
Context:
|
||||
${contextTexts
|
||||
.map((text, i) => {
|
||||
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
|
||||
})
|
||||
.join("")}`,
|
||||
};
|
||||
const memory = [prompt, ...chatHistory, { role: "user", content: input }];
|
||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
return {
|
||||
response: responseText,
|
||||
sources: this.curateSources(sourceDocuments),
|
||||
message: false,
|
||||
};
|
||||
},
|
||||
"namespace-stats": async function (reqBody = {}) {
|
||||
const { namespace = null } = reqBody;
|
||||
if (!namespace) throw new Error("namespace required");
|
||||
const { client } = await this.connect();
|
||||
const stats = await this.namespace(client, namespace);
|
||||
return stats
|
||||
? stats
|
||||
: { message: "No stats were able to be fetched from DB for namespace" };
|
||||
},
|
||||
"delete-namespace": async function (reqBody = {}) {
|
||||
const { namespace = null } = reqBody;
|
||||
const { client } = await this.connect();
|
||||
const details = await this.namespace(client, namespace);
|
||||
await this.deleteVectorsInNamespace(client, namespace);
|
||||
return {
|
||||
message: `Namespace ${camelCase(namespace)} was deleted along with ${details?.vectorCount
|
||||
} vectors.`,
|
||||
};
|
||||
},
|
||||
reset: async function () {
|
||||
const { client } = await this.connect();
|
||||
const weaviateClasses = await this.allNamespaces(client);
|
||||
for (const weaviateClass of weaviateClasses) {
|
||||
await client.schema.classDeleter().withClassName(weaviateClass).do();
|
||||
}
|
||||
return { reset: true };
|
||||
},
|
||||
curateSources: function (sources = []) {
|
||||
const documents = [];
|
||||
for (const source of sources) {
|
||||
if (Object.keys(source).length > 0) {
|
||||
documents.push(source);
|
||||
}
|
||||
}
|
||||
|
||||
return documents;
|
||||
},
|
||||
flattenObjectForWeaviate: function (obj = {}) {
|
||||
// Note this function is not generic, it is designed specifically for Weaviate
|
||||
// https://weaviate.io/developers/weaviate/config-refs/datatypes#introduction
|
||||
// Credit to LangchainJS
|
||||
// https://github.com/hwchase17/langchainjs/blob/5485c4af50c063e257ad54f4393fa79e0aff6462/langchain/src/vectorstores/weaviate.ts#L11C1-L50C3
|
||||
const flattenedObject = {};
|
||||
|
||||
for (const key in obj) {
|
||||
if (!Object.hasOwn(obj, key)) {
|
||||
continue;
|
||||
}
|
||||
const value = obj[key];
|
||||
if (typeof obj[key] === "object" && !Array.isArray(value)) {
|
||||
const recursiveResult = this.flattenObjectForWeaviate(value);
|
||||
|
||||
for (const deepKey in recursiveResult) {
|
||||
if (Object.hasOwn(obj, key)) {
|
||||
flattenedObject[`${key}_${deepKey}`] = recursiveResult[deepKey];
|
||||
}
|
||||
}
|
||||
} else if (Array.isArray(value)) {
|
||||
if (
|
||||
value.length > 0 &&
|
||||
typeof value[0] !== "object" &&
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
value.every((el) => typeof el === typeof value[0])
|
||||
) {
|
||||
// Weaviate only supports arrays of primitive types,
|
||||
// where all elements are of the same type
|
||||
flattenedObject[key] = value;
|
||||
}
|
||||
} else {
|
||||
flattenedObject[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
return flattenedObject;
|
||||
},
|
||||
};
|
||||
|
||||
module.exports.Weaviate = Weaviate;
|
@ -130,6 +130,11 @@
|
||||
dependencies:
|
||||
googleapis-common "^6.0.3"
|
||||
|
||||
"@graphql-typed-document-node/core@^3.1.1":
|
||||
version "3.2.0"
|
||||
resolved "https://registry.yarnpkg.com/@graphql-typed-document-node/core/-/core-3.2.0.tgz#5f3d96ec6b2354ad6d8a28bf216a1d97b5426861"
|
||||
integrity sha512-mB9oAsNCm9aM3/SOv4YtBMqZbYj10R7dkq8byBqxGY/ncFwhf2oQzMV+LCRlWoDSEBJ3COiR1yeDvMtsoOsuFQ==
|
||||
|
||||
"@mapbox/node-pre-gyp@^1.0.0", "@mapbox/node-pre-gyp@^1.0.10":
|
||||
version "1.0.11"
|
||||
resolved "https://registry.yarnpkg.com/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz#417db42b7f5323d79e93b34a6d7a2a12c0df43fa"
|
||||
@ -916,6 +921,11 @@ extend@^3.0.2:
|
||||
resolved "https://registry.yarnpkg.com/extend/-/extend-3.0.2.tgz#f8b1136b4071fbd8eb140aff858b1019ec2915fa"
|
||||
integrity sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==
|
||||
|
||||
extract-files@^9.0.0:
|
||||
version "9.0.0"
|
||||
resolved "https://registry.yarnpkg.com/extract-files/-/extract-files-9.0.0.tgz#8a7744f2437f81f5ed3250ed9f1550de902fe54a"
|
||||
integrity sha512-CvdFfHkC95B4bBBk36hcEmvdR2awOdhhVUYH6S/zrVj3477zven/fJMYg7121h4T1xHZC+tetUpubpAhxwI7hQ==
|
||||
|
||||
extract-zip@^2.0.1:
|
||||
version "2.0.1"
|
||||
resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-2.0.1.tgz#663dca56fe46df890d5f131ef4a06d22bb8ba13a"
|
||||
@ -981,6 +991,15 @@ follow-redirects@^1.14.8:
|
||||
resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.2.tgz#b460864144ba63f2681096f274c4e57026da2c13"
|
||||
integrity sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==
|
||||
|
||||
form-data@^3.0.0:
|
||||
version "3.0.1"
|
||||
resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f"
|
||||
integrity sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==
|
||||
dependencies:
|
||||
asynckit "^0.4.0"
|
||||
combined-stream "^1.0.8"
|
||||
mime-types "^2.1.12"
|
||||
|
||||
form-data@^4.0.0:
|
||||
version "4.0.0"
|
||||
resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.0.tgz#93919daeaf361ee529584b9b31664dc12c9fa452"
|
||||
@ -1149,6 +1168,21 @@ graceful-fs@^4.2.0, graceful-fs@^4.2.6:
|
||||
resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3"
|
||||
integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==
|
||||
|
||||
graphql-request@^5.1.0:
|
||||
version "5.2.0"
|
||||
resolved "https://registry.yarnpkg.com/graphql-request/-/graphql-request-5.2.0.tgz#a05fb54a517d91bb2d7aefa17ade4523dc5ebdca"
|
||||
integrity sha512-pLhKIvnMyBERL0dtFI3medKqWOz/RhHdcgbZ+hMMIb32mEPa5MJSzS4AuXxfI4sRAu6JVVk5tvXuGfCWl9JYWQ==
|
||||
dependencies:
|
||||
"@graphql-typed-document-node/core" "^3.1.1"
|
||||
cross-fetch "^3.1.5"
|
||||
extract-files "^9.0.0"
|
||||
form-data "^3.0.0"
|
||||
|
||||
graphql@^16.7.1:
|
||||
version "16.7.1"
|
||||
resolved "https://registry.yarnpkg.com/graphql/-/graphql-16.7.1.tgz#11475b74a7bff2aefd4691df52a0eca0abd9b642"
|
||||
integrity sha512-DRYR9tf+UGU0KOsMcKAlXeFfX89UiiIZ0dRU3mR0yJfu6OjZqUcp68NnFLnqQU5RexygFoDy1EW+ccOYcPfmHg==
|
||||
|
||||
gtoken@^6.1.0:
|
||||
version "6.1.2"
|
||||
resolved "https://registry.yarnpkg.com/gtoken/-/gtoken-6.1.2.tgz#aeb7bdb019ff4c3ba3ac100bbe7b6e74dce0e8bc"
|
||||
@ -2507,6 +2541,15 @@ vectordb@0.1.12:
|
||||
"@apache-arrow/ts" "^12.0.0"
|
||||
apache-arrow "^12.0.0"
|
||||
|
||||
weaviate-ts-client@^1.4.0:
|
||||
version "1.4.0"
|
||||
resolved "https://registry.yarnpkg.com/weaviate-ts-client/-/weaviate-ts-client-1.4.0.tgz#e1adb670f2c1930a82601efb915b0131f6988b7e"
|
||||
integrity sha512-G2V/IWMHXDjoJeATUYKkZXzAs7iRj4GE8B3AX59XDqMRW12X7VUkRgo4xWcHH1bjpLIHUYTzD5qZXcB8P9Hdmw==
|
||||
dependencies:
|
||||
graphql-request "^5.1.0"
|
||||
isomorphic-fetch "^3.0.0"
|
||||
uuid "^9.0.0"
|
||||
|
||||
webidl-conversions@^3.0.0:
|
||||
version "3.0.1"
|
||||
resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871"
|
||||
|
Loading…
Reference in New Issue
Block a user