diff --git a/server/utils/EmbeddingEngines/azureOpenAi/index.js b/server/utils/EmbeddingEngines/azureOpenAi/index.js index 3f36b576..e80b4b73 100644 --- a/server/utils/EmbeddingEngines/azureOpenAi/index.js +++ b/server/utils/EmbeddingEngines/azureOpenAi/index.js @@ -16,7 +16,8 @@ class AzureOpenAiEmbedder { // Limit of how many strings we can process in a single pass to stay with resource or network limits // https://learn.microsoft.com/en-us/azure/ai-services/openai/faq#i-am-trying-to-use-embeddings-and-received-the-error--invalidrequesterror--too-many-inputs--the-max-number-of-inputs-is-1---how-do-i-fix-this-:~:text=consisting%20of%20up%20to%2016%20inputs%20per%20API%20request - this.embeddingMaxChunkLength = 16; + this.maxConcurrentChunks = 16; + this.embeddingMaxChunkLength = 1_000; } async embedTextInput(textInput) { @@ -34,9 +35,9 @@ class AzureOpenAiEmbedder { // Because there is a limit on how many chunks can be sent at once to Azure OpenAI // we concurrently execute each max batch of text chunks possible. - // Refer to constructor embeddingMaxChunkLength for more info. + // Refer to constructor maxConcurrentChunks for more info. const embeddingRequests = []; - for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) { + for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) { embeddingRequests.push( new Promise((resolve) => { this.openai diff --git a/server/utils/EmbeddingEngines/localAi/index.js b/server/utils/EmbeddingEngines/localAi/index.js index 68fe6654..1480755d 100644 --- a/server/utils/EmbeddingEngines/localAi/index.js +++ b/server/utils/EmbeddingEngines/localAi/index.js @@ -18,6 +18,7 @@ class LocalAiEmbedder { this.openai = new OpenAIApi(config); // Limit of how many strings we can process in a single pass to stay with resource or network limits + this.maxConcurrentChunks = 50; this.embeddingMaxChunkLength = maximumChunkLength(); } @@ -28,7 +29,7 @@ class LocalAiEmbedder { async embedChunks(textChunks = []) { const embeddingRequests = []; - for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) { + for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) { embeddingRequests.push( new Promise((resolve) => { this.openai diff --git a/server/utils/EmbeddingEngines/native/index.js b/server/utils/EmbeddingEngines/native/index.js index 81189d4e..69e13a9e 100644 --- a/server/utils/EmbeddingEngines/native/index.js +++ b/server/utils/EmbeddingEngines/native/index.js @@ -14,7 +14,8 @@ class NativeEmbedder { this.modelPath = path.resolve(this.cacheDir, "Xenova", "all-MiniLM-L6-v2"); // Limit of how many strings we can process in a single pass to stay with resource or network limits - this.embeddingMaxChunkLength = 50; + this.maxConcurrentChunks = 50; + this.embeddingMaxChunkLength = 1_000; // Make directory when it does not exist in existing installations if (!fs.existsSync(this.cacheDir)) fs.mkdirSync(this.cacheDir); @@ -63,7 +64,7 @@ class NativeEmbedder { async embedChunks(textChunks = []) { const Embedder = await this.embedderClient(); const embeddingResults = []; - for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) { + for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) { const output = await Embedder(chunk, { pooling: "mean", normalize: true, diff --git a/server/utils/EmbeddingEngines/openAi/index.js b/server/utils/EmbeddingEngines/openAi/index.js index 6ba38c89..105be9d7 100644 --- a/server/utils/EmbeddingEngines/openAi/index.js +++ b/server/utils/EmbeddingEngines/openAi/index.js @@ -11,6 +11,7 @@ class OpenAiEmbedder { this.openai = openai; // Limit of how many strings we can process in a single pass to stay with resource or network limits + this.maxConcurrentChunks = 500; this.embeddingMaxChunkLength = 1_000; } @@ -22,9 +23,9 @@ class OpenAiEmbedder { async embedChunks(textChunks = []) { // Because there is a hard POST limit on how many chunks can be sent at once to OpenAI (~8mb) // we concurrently execute each max batch of text chunks possible. - // Refer to constructor embeddingMaxChunkLength for more info. + // Refer to constructor maxConcurrentChunks for more info. const embeddingRequests = []; - for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) { + for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) { embeddingRequests.push( new Promise((resolve) => { this.openai