2023-06-08 06:31:35 +02:00
|
|
|
function getVectorDbClass() {
|
|
|
|
const vectorSelection = process.env.VECTOR_DB || "pinecone";
|
|
|
|
switch (vectorSelection) {
|
|
|
|
case "pinecone":
|
2023-08-04 23:56:27 +02:00
|
|
|
const { Pinecone } = require("../vectorDbProviders/pinecone");
|
2023-06-08 06:31:35 +02:00
|
|
|
return Pinecone;
|
|
|
|
case "chroma":
|
2023-08-04 23:56:27 +02:00
|
|
|
const { Chroma } = require("../vectorDbProviders/chroma");
|
2023-06-08 06:31:35 +02:00
|
|
|
return Chroma;
|
2023-06-09 03:40:29 +02:00
|
|
|
case "lancedb":
|
2023-08-04 23:56:27 +02:00
|
|
|
const { LanceDb } = require("../vectorDbProviders/lance");
|
2023-06-09 03:40:29 +02:00
|
|
|
return LanceDb;
|
2023-08-09 03:02:30 +02:00
|
|
|
case "weaviate":
|
|
|
|
const { Weaviate } = require("../vectorDbProviders/weaviate");
|
|
|
|
return Weaviate;
|
2023-08-16 00:26:44 +02:00
|
|
|
case "qdrant":
|
|
|
|
const { QDrant } = require("../vectorDbProviders/qdrant");
|
|
|
|
return QDrant;
|
2024-01-12 22:23:57 +01:00
|
|
|
case "milvus":
|
|
|
|
const { Milvus } = require("../vectorDbProviders/milvus");
|
|
|
|
return Milvus;
|
2024-01-18 03:00:54 +01:00
|
|
|
case "zilliz":
|
|
|
|
const { Zilliz } = require("../vectorDbProviders/zilliz");
|
|
|
|
return Zilliz;
|
2023-06-08 06:31:35 +02:00
|
|
|
default:
|
2023-06-08 22:13:48 +02:00
|
|
|
throw new Error("ENV: No VECTOR_DB value found in environment!");
|
2023-06-08 06:31:35 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-17 21:59:25 +01:00
|
|
|
function getLLMProvider(modelPreference = null) {
|
2023-08-04 23:56:27 +02:00
|
|
|
const vectorSelection = process.env.LLM_PROVIDER || "openai";
|
2023-11-17 00:19:49 +01:00
|
|
|
const embedder = getEmbeddingEngineSelection();
|
2023-08-04 23:56:27 +02:00
|
|
|
switch (vectorSelection) {
|
|
|
|
case "openai":
|
2023-10-30 23:44:03 +01:00
|
|
|
const { OpenAiLLM } = require("../AiProviders/openAi");
|
2024-01-17 21:59:25 +01:00
|
|
|
return new OpenAiLLM(embedder, modelPreference);
|
2023-08-04 23:56:27 +02:00
|
|
|
case "azure":
|
2023-10-30 23:44:03 +01:00
|
|
|
const { AzureOpenAiLLM } = require("../AiProviders/azureOpenAi");
|
2024-01-17 21:59:25 +01:00
|
|
|
return new AzureOpenAiLLM(embedder, modelPreference);
|
2023-10-30 23:44:03 +01:00
|
|
|
case "anthropic":
|
|
|
|
const { AnthropicLLM } = require("../AiProviders/anthropic");
|
2024-01-17 21:59:25 +01:00
|
|
|
return new AnthropicLLM(embedder, modelPreference);
|
2023-12-28 02:08:03 +01:00
|
|
|
case "gemini":
|
|
|
|
const { GeminiLLM } = require("../AiProviders/gemini");
|
2024-01-17 21:59:25 +01:00
|
|
|
return new GeminiLLM(embedder, modelPreference);
|
2023-11-09 21:33:21 +01:00
|
|
|
case "lmstudio":
|
|
|
|
const { LMStudioLLM } = require("../AiProviders/lmStudio");
|
2024-01-17 21:59:25 +01:00
|
|
|
return new LMStudioLLM(embedder, modelPreference);
|
2023-11-14 21:31:44 +01:00
|
|
|
case "localai":
|
|
|
|
const { LocalAiLLM } = require("../AiProviders/localAi");
|
2024-01-17 21:59:25 +01:00
|
|
|
return new LocalAiLLM(embedder, modelPreference);
|
2023-12-28 02:21:47 +01:00
|
|
|
case "ollama":
|
|
|
|
const { OllamaAILLM } = require("../AiProviders/ollama");
|
2024-01-17 21:59:25 +01:00
|
|
|
return new OllamaAILLM(embedder, modelPreference);
|
2024-01-10 21:35:30 +01:00
|
|
|
case "togetherai":
|
|
|
|
const { TogetherAiLLM } = require("../AiProviders/togetherAi");
|
2024-01-17 21:59:25 +01:00
|
|
|
return new TogetherAiLLM(embedder, modelPreference);
|
2024-01-17 23:42:05 +01:00
|
|
|
case "mistral":
|
|
|
|
const { MistralLLM } = require("../AiProviders/mistral");
|
|
|
|
return new MistralLLM(embedder, modelPreference);
|
2023-12-07 23:48:27 +01:00
|
|
|
case "native":
|
|
|
|
const { NativeLLM } = require("../AiProviders/native");
|
2024-01-17 21:59:25 +01:00
|
|
|
return new NativeLLM(embedder, modelPreference);
|
2023-08-04 23:56:27 +02:00
|
|
|
default:
|
|
|
|
throw new Error("ENV: No LLM_PROVIDER value found in environment!");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-30 23:44:03 +01:00
|
|
|
function getEmbeddingEngineSelection() {
|
|
|
|
const engineSelection = process.env.EMBEDDING_ENGINE;
|
|
|
|
switch (engineSelection) {
|
|
|
|
case "openai":
|
2023-10-30 23:49:29 +01:00
|
|
|
const { OpenAiEmbedder } = require("../EmbeddingEngines/openAi");
|
2023-10-30 23:44:03 +01:00
|
|
|
return new OpenAiEmbedder();
|
|
|
|
case "azure":
|
|
|
|
const {
|
|
|
|
AzureOpenAiEmbedder,
|
2023-10-30 23:49:29 +01:00
|
|
|
} = require("../EmbeddingEngines/azureOpenAi");
|
2023-10-30 23:44:03 +01:00
|
|
|
return new AzureOpenAiEmbedder();
|
2023-11-14 22:49:31 +01:00
|
|
|
case "localai":
|
|
|
|
const { LocalAiEmbedder } = require("../EmbeddingEngines/localAi");
|
|
|
|
return new LocalAiEmbedder();
|
2023-12-06 19:36:22 +01:00
|
|
|
case "native":
|
|
|
|
const { NativeEmbedder } = require("../EmbeddingEngines/native");
|
2024-01-17 23:42:05 +01:00
|
|
|
console.log("\x1b[34m[INFO]\x1b[0m Using Native Embedder");
|
2023-12-06 19:36:22 +01:00
|
|
|
return new NativeEmbedder();
|
2023-10-30 23:44:03 +01:00
|
|
|
default:
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-08 01:27:36 +01:00
|
|
|
// Some models have lower restrictions on chars that can be encoded in a single pass
|
|
|
|
// and by default we assume it can handle 1,000 chars, but some models use work with smaller
|
|
|
|
// chars so here we can override that value when embedding information.
|
|
|
|
function maximumChunkLength() {
|
|
|
|
if (
|
|
|
|
!!process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH &&
|
|
|
|
!isNaN(process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH) &&
|
|
|
|
Number(process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH) > 1
|
|
|
|
)
|
|
|
|
return Number(process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH);
|
|
|
|
|
|
|
|
return 1_000;
|
|
|
|
}
|
|
|
|
|
2023-06-08 22:13:48 +02:00
|
|
|
function toChunks(arr, size) {
|
|
|
|
return Array.from({ length: Math.ceil(arr.length / size) }, (_v, i) =>
|
|
|
|
arr.slice(i * size, i * size + size)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2023-06-08 06:31:35 +02:00
|
|
|
module.exports = {
|
2023-10-30 23:44:03 +01:00
|
|
|
getEmbeddingEngineSelection,
|
2023-12-08 01:27:36 +01:00
|
|
|
maximumChunkLength,
|
2023-06-08 06:31:35 +02:00
|
|
|
getVectorDbClass,
|
2023-08-04 23:56:27 +02:00
|
|
|
getLLMProvider,
|
2023-06-08 22:13:48 +02:00
|
|
|
toChunks,
|
2023-06-08 06:31:35 +02:00
|
|
|
};
|