2023-06-08 06:31:35 +02:00
|
|
|
function getVectorDbClass() {
|
2023-06-08 22:13:48 +02:00
|
|
|
const { Pinecone } = require("../pinecone");
|
|
|
|
const { Chroma } = require("../chroma");
|
2023-06-09 03:40:29 +02:00
|
|
|
const { LanceDb } = require("../lancedb");
|
2023-06-08 22:13:48 +02:00
|
|
|
|
2023-06-08 06:31:35 +02:00
|
|
|
const vectorSelection = process.env.VECTOR_DB || "pinecone";
|
|
|
|
switch (vectorSelection) {
|
|
|
|
case "pinecone":
|
|
|
|
return Pinecone;
|
|
|
|
case "chroma":
|
|
|
|
return Chroma;
|
2023-06-09 03:40:29 +02:00
|
|
|
case "lancedb":
|
|
|
|
return LanceDb;
|
2023-06-08 06:31:35 +02:00
|
|
|
default:
|
2023-06-08 22:13:48 +02:00
|
|
|
throw new Error("ENV: No VECTOR_DB value found in environment!");
|
2023-06-08 06:31:35 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-08 22:13:48 +02:00
|
|
|
function toChunks(arr, size) {
|
|
|
|
return Array.from({ length: Math.ceil(arr.length / size) }, (_v, i) =>
|
|
|
|
arr.slice(i * size, i * size + size)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
function curateSources(sources = []) {
|
|
|
|
const knownDocs = [];
|
|
|
|
const documents = [];
|
|
|
|
for (const source of sources) {
|
|
|
|
const { metadata = {} } = source;
|
|
|
|
if (
|
|
|
|
Object.keys(metadata).length > 0 &&
|
|
|
|
!knownDocs.includes(metadata.title)
|
|
|
|
) {
|
|
|
|
documents.push({ ...metadata });
|
|
|
|
knownDocs.push(metadata.title);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return documents;
|
|
|
|
}
|
|
|
|
|
2023-06-08 06:31:35 +02:00
|
|
|
module.exports = {
|
|
|
|
getVectorDbClass,
|
2023-06-08 22:13:48 +02:00
|
|
|
toChunks,
|
|
|
|
curateSources,
|
2023-06-08 06:31:35 +02:00
|
|
|
};
|