Ollama sequential embedding (#2230)

* ollama: Switch from parallel to sequential chunk embedding

* throw error on empty embeddings

---------

Co-authored-by: John Blomberg <john.jb.blomberg@gmail.com>
This commit is contained in:
Timothy Carambat 2024-09-06 10:06:46 -07:00 committed by GitHub
parent fef01550df
commit 20135835d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -36,67 +36,48 @@ class OllamaEmbedder {
return result?.[0] || []; return result?.[0] || [];
} }
/**
* This function takes an array of text chunks and embeds them using the Ollama API.
* chunks are processed sequentially to avoid overwhelming the API with too many requests
* or running out of resources on the endpoint running the ollama instance.
* @param {string[]} textChunks - An array of text chunks to embed.
* @returns {Promise<Array<number[]>>} - A promise that resolves to an array of embeddings.
*/
async embedChunks(textChunks = []) { async embedChunks(textChunks = []) {
if (!(await this.#isAlive())) if (!(await this.#isAlive()))
throw new Error( throw new Error(
`Ollama service could not be reached. Is Ollama running?` `Ollama service could not be reached. Is Ollama running?`
); );
const embeddingRequests = [];
this.log( this.log(
`Embedding ${textChunks.length} chunks of text with ${this.model}.` `Embedding ${textChunks.length} chunks of text with ${this.model}.`
); );
let data = [];
let error = null;
for (const chunk of textChunks) { for (const chunk of textChunks) {
embeddingRequests.push( try {
new Promise((resolve) => { const res = await fetch(this.basePath, {
fetch(this.basePath, { method: "POST",
method: "POST", body: JSON.stringify({
body: JSON.stringify({ model: this.model,
model: this.model, prompt: chunk,
prompt: chunk, }),
}), });
})
.then((res) => res.json())
.then(({ embedding }) => {
resolve({ data: embedding, error: null });
return;
})
.catch((error) => {
resolve({ data: [], error: error.message });
return;
});
})
);
}
const { data = [], error = null } = await Promise.all( const { embedding } = await res.json();
embeddingRequests if (!Array.isArray(embedding) || embedding.length === 0)
).then((results) => { throw new Error("Ollama returned an empty embedding for chunk!");
// If any errors were returned from Ollama abort the entire sequence because the embeddings
// will be incomplete.
const errors = results data.push(embedding);
.filter((res) => !!res.error) } catch (err) {
.map((res) => res.error) this.log(err.message);
.flat(); error = err.message;
if (errors.length > 0) { data = [];
let uniqueErrors = new Set(); break;
errors.map((error) =>
uniqueErrors.add(`[${error.type}]: ${error.message}`)
);
return {
data: [],
error: Array.from(uniqueErrors).join(", "),
};
} }
}
return {
data: results.map((res) => res?.data || []),
error: null,
};
});
if (!!error) throw new Error(`Ollama Failed to embed: ${error}`); if (!!error) throw new Error(`Ollama Failed to embed: ${error}`);
return data.length > 0 ? data : null; return data.length > 0 ? data : null;