2024-05-19 20:20:23 +02:00
|
|
|
class VoyageAiEmbedder {
|
|
|
|
constructor() {
|
|
|
|
if (!process.env.VOYAGEAI_API_KEY)
|
|
|
|
throw new Error("No Voyage AI API key was set.");
|
|
|
|
|
|
|
|
const {
|
|
|
|
VoyageEmbeddings,
|
|
|
|
} = require("@langchain/community/embeddings/voyage");
|
|
|
|
const voyage = new VoyageEmbeddings({
|
|
|
|
apiKey: process.env.VOYAGEAI_API_KEY,
|
|
|
|
});
|
|
|
|
|
|
|
|
this.voyage = voyage;
|
|
|
|
this.model = process.env.EMBEDDING_MODEL_PREF || "voyage-large-2-instruct";
|
|
|
|
|
|
|
|
// Limit of how many strings we can process in a single pass to stay with resource or network limits
|
|
|
|
this.batchSize = 128; // Voyage AI's limit per request is 128 https://docs.voyageai.com/docs/rate-limits#use-larger-batches
|
2024-05-22 17:53:49 +02:00
|
|
|
this.embeddingMaxChunkLength = this.#getMaxEmbeddingLength();
|
|
|
|
}
|
|
|
|
|
|
|
|
// https://docs.voyageai.com/docs/embeddings
|
|
|
|
#getMaxEmbeddingLength() {
|
|
|
|
switch (this.model) {
|
|
|
|
case "voyage-large-2-instruct":
|
|
|
|
case "voyage-law-2":
|
|
|
|
case "voyage-code-2":
|
|
|
|
case "voyage-large-2":
|
|
|
|
return 16_000;
|
|
|
|
case "voyage-2":
|
|
|
|
return 4_000;
|
|
|
|
default:
|
|
|
|
return 4_000;
|
|
|
|
}
|
2024-05-19 20:20:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
async embedTextInput(textInput) {
|
|
|
|
const result = await this.voyage.embedDocuments(
|
|
|
|
Array.isArray(textInput) ? textInput : [textInput],
|
|
|
|
{ modelName: this.model }
|
|
|
|
);
|
2024-05-22 20:42:48 +02:00
|
|
|
|
|
|
|
// If given an array return the native Array[Array] format since that should be the outcome.
|
|
|
|
// But if given a single string, we need to flatten it so that we have a 1D array.
|
|
|
|
return (Array.isArray(textInput) ? result : result.flat()) || [];
|
2024-05-19 20:20:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
async embedChunks(textChunks = []) {
|
|
|
|
try {
|
|
|
|
const embeddings = await this.voyage.embedDocuments(textChunks, {
|
|
|
|
modelName: this.model,
|
|
|
|
batchSize: this.batchSize,
|
|
|
|
});
|
|
|
|
return embeddings;
|
|
|
|
} catch (error) {
|
|
|
|
console.error("Voyage AI Failed to embed:", error);
|
2024-05-22 20:42:48 +02:00
|
|
|
if (
|
|
|
|
error.message.includes(
|
|
|
|
"Cannot read properties of undefined (reading '0')"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
throw new Error("Voyage AI failed to embed: Rate limit reached");
|
2024-05-19 20:20:23 +02:00
|
|
|
throw error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = {
|
|
|
|
VoyageAiEmbedder,
|
|
|
|
};
|