diff --git a/server/utils/AiProviders/azureOpenAi/index.js b/server/utils/AiProviders/azureOpenAi/index.js index 8743cc8a..abb459fb 100644 --- a/server/utils/AiProviders/azureOpenAi/index.js +++ b/server/utils/AiProviders/azureOpenAi/index.js @@ -1,3 +1,5 @@ +const { toChunks } = require("../../helpers"); + class AzureOpenAi { constructor() { const { OpenAIClient, AzureKeyCredential } = require("@azure/openai"); @@ -6,6 +8,10 @@ class AzureOpenAi { new AzureKeyCredential(process.env.AZURE_OPENAI_KEY) ); this.openai = openai; + + // The maximum amount of "inputs" that OpenAI API can process in a single call. + // https://learn.microsoft.com/en-us/azure/ai-services/openai/faq#i-am-trying-to-use-embeddings-and-received-the-error--invalidrequesterror--too-many-inputs--the-max-number-of-inputs-is-1---how-do-i-fix-this-:~:text=consisting%20of%20up%20to%2016%20inputs%20per%20API%20request + this.embeddingChunkLimit = 16; } isValidChatModel(_modelName = "") { @@ -83,10 +89,49 @@ class AzureOpenAi { "No EMBEDDING_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an embedding model." ); - const { data = [] } = await this.openai.getEmbeddings( - textEmbeddingModel, - textChunks - ); + // Because there is a limit on how many chunks can be sent at once to Azure OpenAI + // we concurrently execute each max batch of text chunks possible. + // Refer to constructor embeddingChunkLimit for more info. + const embeddingRequests = []; + for (const chunk of toChunks(textChunks, this.embeddingChunkLimit)) { + embeddingRequests.push( + new Promise((resolve) => { + this.openai + .getEmbeddings(textEmbeddingModel, chunk) + .then((res) => { + resolve({ data: res.data, error: null }); + }) + .catch((e) => { + resolve({ data: [], error: e?.error }); + }); + }) + ); + } + + const { data = [], error = null } = await Promise.all( + embeddingRequests + ).then((results) => { + // If any errors were returned from Azure abort the entire sequence because the embeddings + // will be incomplete. + const errors = results + .filter((res) => !!res.error) + .map((res) => res.error) + .flat(); + if (errors.length > 0) { + return { + data: [], + error: `(${errors.length}) Embedding Errors! ${errors + .map((error) => `[${error.type}]: ${error.message}`) + .join(", ")}`, + }; + } + return { + data: results.map((res) => res?.data || []).flat(), + error: null, + }; + }); + + if (!!error) throw new Error(`Azure OpenAI Failed to embed: ${error}`); return data.length > 0 && data.every((embd) => embd.hasOwnProperty("embedding")) ? data.map((embd) => embd.embedding) diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js index 8d7ae7c6..1b523b24 100644 --- a/server/utils/vectorDbProviders/pinecone/index.js +++ b/server/utils/vectorDbProviders/pinecone/index.js @@ -186,10 +186,12 @@ const Pinecone = { if (knownDocuments.length === 0) return; const vectorIds = knownDocuments.map((doc) => doc.vectorId); - await pineconeIndex.delete1({ - ids: vectorIds, - namespace, - }); + for (const batchOfVectorIds of toChunks(vectorIds, 1000)) { + await pineconeIndex.delete1({ + ids: batchOfVectorIds, + namespace, + }); + } const indexes = knownDocuments.map((doc) => doc.id); await DocumentVectors.deleteIds(indexes);