anything-llm/server/utils/EmbeddingEngines/voyageAi/index.js
Sean Hatfield 5bf4b4db58
[FEAT] Add support for Voyage AI embedder (#1401)
* add support for voyageai embedder

* remove unneeded import

* linting

* Add ENV examples
Update how chunks are processed for Voyage
use correct langchain import
Add data handling

---------

Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
2024-05-19 13:20:23 -05:00

46 lines
1.4 KiB
JavaScript

class VoyageAiEmbedder {
constructor() {
if (!process.env.VOYAGEAI_API_KEY)
throw new Error("No Voyage AI API key was set.");
const {
VoyageEmbeddings,
} = require("@langchain/community/embeddings/voyage");
const voyage = new VoyageEmbeddings({
apiKey: process.env.VOYAGEAI_API_KEY,
});
this.voyage = voyage;
this.model = process.env.EMBEDDING_MODEL_PREF || "voyage-large-2-instruct";
// Limit of how many strings we can process in a single pass to stay with resource or network limits
this.batchSize = 128; // Voyage AI's limit per request is 128 https://docs.voyageai.com/docs/rate-limits#use-larger-batches
this.embeddingMaxChunkLength = 4000; // https://docs.voyageai.com/docs/embeddings - assume a token is roughly 4 letters with some padding
}
async embedTextInput(textInput) {
const result = await this.voyage.embedDocuments(
Array.isArray(textInput) ? textInput : [textInput],
{ modelName: this.model }
);
return result || [];
}
async embedChunks(textChunks = []) {
try {
const embeddings = await this.voyage.embedDocuments(textChunks, {
modelName: this.model,
batchSize: this.batchSize,
});
return embeddings;
} catch (error) {
console.error("Voyage AI Failed to embed:", error);
throw error;
}
}
}
module.exports = {
VoyageAiEmbedder,
};