mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-10 17:00:11 +01:00
[FEAT] Add support for Voyage AI embedder (#1401)
* add support for voyageai embedder * remove unneeded import * linting * Add ENV examples Update how chunks are processed for Voyage use correct langchain import Add data handling --------- Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
This commit is contained in:
parent
ecd5d3cb8a
commit
5bf4b4db58
@ -124,6 +124,10 @@ GID='1000'
|
||||
# COHERE_API_KEY=
|
||||
# EMBEDDING_MODEL_PREF='embed-english-v3.0'
|
||||
|
||||
# EMBEDDING_ENGINE='voyageai'
|
||||
# VOYAGEAI_API_KEY=
|
||||
# EMBEDDING_MODEL_PREF='voyage-large-2-instruct'
|
||||
|
||||
###########################################
|
||||
######## Vector Database Selection ########
|
||||
###########################################
|
||||
|
@ -0,0 +1,50 @@
|
||||
export default function VoyageAiOptions({ settings }) {
|
||||
return (
|
||||
<div className="w-full flex flex-col gap-y-4">
|
||||
<div className="w-full flex items-center gap-4">
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
API Key
|
||||
</label>
|
||||
<input
|
||||
type="password"
|
||||
name="VoyageAiApiKey"
|
||||
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||
placeholder="Voyage AI API Key"
|
||||
defaultValue={settings?.VoyageAiApiKey ? "*".repeat(20) : ""}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
Model Preference
|
||||
</label>
|
||||
<select
|
||||
name="EmbeddingModelPref"
|
||||
required={true}
|
||||
defaultValue={settings?.EmbeddingModelPref}
|
||||
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
>
|
||||
<optgroup label="Available embedding models">
|
||||
{[
|
||||
"voyage-large-2-instruct",
|
||||
"voyage-law-2",
|
||||
"voyage-code-2",
|
||||
"voyage-large-2",
|
||||
"voyage-2",
|
||||
].map((model) => {
|
||||
return (
|
||||
<option key={model} value={model}>
|
||||
{model}
|
||||
</option>
|
||||
);
|
||||
})}
|
||||
</optgroup>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
BIN
frontend/src/media/embeddingprovider/voyageai.png
Normal file
BIN
frontend/src/media/embeddingprovider/voyageai.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 20 KiB |
@ -10,6 +10,8 @@ import LocalAiLogo from "@/media/llmprovider/localai.png";
|
||||
import OllamaLogo from "@/media/llmprovider/ollama.png";
|
||||
import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
|
||||
import CohereLogo from "@/media/llmprovider/cohere.png";
|
||||
import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png";
|
||||
|
||||
import PreLoader from "@/components/Preloader";
|
||||
import ChangeWarningModal from "@/components/ChangeWarning";
|
||||
import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions";
|
||||
@ -19,6 +21,7 @@ import NativeEmbeddingOptions from "@/components/EmbeddingSelection/NativeEmbedd
|
||||
import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions";
|
||||
import LMStudioEmbeddingOptions from "@/components/EmbeddingSelection/LMStudioOptions";
|
||||
import CohereEmbeddingOptions from "@/components/EmbeddingSelection/CohereOptions";
|
||||
import VoyageAiOptions from "@/components/EmbeddingSelection/VoyageAiOptions";
|
||||
|
||||
import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem";
|
||||
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
|
||||
@ -78,6 +81,13 @@ const EMBEDDERS = [
|
||||
options: (settings) => <CohereEmbeddingOptions settings={settings} />,
|
||||
description: "Run powerful embedding models from Cohere.",
|
||||
},
|
||||
{
|
||||
name: "Voyage AI",
|
||||
value: "voyageai",
|
||||
logo: VoyageAiLogo,
|
||||
options: (settings) => <VoyageAiOptions settings={settings} />,
|
||||
description: "Run powerful embedding models from Voyage AI.",
|
||||
},
|
||||
];
|
||||
|
||||
export default function GeneralEmbeddingPreference() {
|
||||
|
@ -28,6 +28,8 @@ import LanceDbLogo from "@/media/vectordbs/lancedb.png";
|
||||
import WeaviateLogo from "@/media/vectordbs/weaviate.png";
|
||||
import QDrantLogo from "@/media/vectordbs/qdrant.png";
|
||||
import MilvusLogo from "@/media/vectordbs/milvus.png";
|
||||
import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png";
|
||||
|
||||
import React, { useState, useEffect } from "react";
|
||||
import paths from "@/utils/paths";
|
||||
import { useNavigate } from "react-router-dom";
|
||||
@ -292,6 +294,13 @@ export const EMBEDDING_ENGINE_PRIVACY = {
|
||||
],
|
||||
logo: CohereLogo,
|
||||
},
|
||||
voyageai: {
|
||||
name: "Voyage AI",
|
||||
description: [
|
||||
"Data sent to Voyage AI's servers is shared according to the terms of service of voyageai.com.",
|
||||
],
|
||||
logo: VoyageAiLogo,
|
||||
},
|
||||
};
|
||||
|
||||
export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
|
||||
|
@ -121,6 +121,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
|
||||
# COHERE_API_KEY=
|
||||
# EMBEDDING_MODEL_PREF='embed-english-v3.0'
|
||||
|
||||
# EMBEDDING_ENGINE='voyageai'
|
||||
# VOYAGEAI_API_KEY=
|
||||
# EMBEDDING_MODEL_PREF='voyage-large-2-instruct'
|
||||
|
||||
###########################################
|
||||
######## Vector Database Selection ########
|
||||
###########################################
|
||||
|
@ -506,7 +506,10 @@ function apiWorkspaceEndpoints(app) {
|
||||
if (!document) return response.sendStatus(404).end();
|
||||
|
||||
await Document.update(document.id, { pinned: pinStatus });
|
||||
return response.status(200).json({ message: 'Pin status updated successfully' }).end();
|
||||
return response
|
||||
.status(200)
|
||||
.json({ message: "Pin status updated successfully" })
|
||||
.end();
|
||||
} catch (error) {
|
||||
console.error("Error processing the pin status update:", error);
|
||||
return response.status(500).end();
|
||||
|
@ -426,6 +426,9 @@ const SystemSettings = {
|
||||
// Cohere API Keys
|
||||
CohereApiKey: !!process.env.COHERE_API_KEY,
|
||||
CohereModelPref: process.env.COHERE_MODEL_PREF,
|
||||
|
||||
// VoyageAi API Keys
|
||||
VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY,
|
||||
};
|
||||
},
|
||||
|
||||
|
@ -1999,7 +1999,8 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
},"/v1/workspace/{slug}/update-pin": {
|
||||
},
|
||||
"/workspace/{slug}/update-pin": {
|
||||
"post": {
|
||||
"tags": [
|
||||
"Workspaces"
|
||||
@ -2037,6 +2038,9 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden"
|
||||
},
|
||||
"404": {
|
||||
"description": "Document not found"
|
||||
},
|
||||
@ -2047,20 +2051,12 @@
|
||||
"requestBody": {
|
||||
"description": "JSON object with the document path and pin status to update.",
|
||||
"required": true,
|
||||
"type": "object",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"docPath": {
|
||||
"type": "string",
|
||||
"example": "custom-documents/my-pdf.pdf-hash.json"
|
||||
},
|
||||
"pinStatus": {
|
||||
"type": "boolean",
|
||||
"example": true
|
||||
}
|
||||
}
|
||||
"example": {
|
||||
"docPath": "custom-documents/my-pdf.pdf-hash.json",
|
||||
"pinStatus": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
45
server/utils/EmbeddingEngines/voyageAi/index.js
Normal file
45
server/utils/EmbeddingEngines/voyageAi/index.js
Normal file
@ -0,0 +1,45 @@
|
||||
class VoyageAiEmbedder {
|
||||
constructor() {
|
||||
if (!process.env.VOYAGEAI_API_KEY)
|
||||
throw new Error("No Voyage AI API key was set.");
|
||||
|
||||
const {
|
||||
VoyageEmbeddings,
|
||||
} = require("@langchain/community/embeddings/voyage");
|
||||
const voyage = new VoyageEmbeddings({
|
||||
apiKey: process.env.VOYAGEAI_API_KEY,
|
||||
});
|
||||
|
||||
this.voyage = voyage;
|
||||
this.model = process.env.EMBEDDING_MODEL_PREF || "voyage-large-2-instruct";
|
||||
|
||||
// Limit of how many strings we can process in a single pass to stay with resource or network limits
|
||||
this.batchSize = 128; // Voyage AI's limit per request is 128 https://docs.voyageai.com/docs/rate-limits#use-larger-batches
|
||||
this.embeddingMaxChunkLength = 4000; // https://docs.voyageai.com/docs/embeddings - assume a token is roughly 4 letters with some padding
|
||||
}
|
||||
|
||||
async embedTextInput(textInput) {
|
||||
const result = await this.voyage.embedDocuments(
|
||||
Array.isArray(textInput) ? textInput : [textInput],
|
||||
{ modelName: this.model }
|
||||
);
|
||||
return result || [];
|
||||
}
|
||||
|
||||
async embedChunks(textChunks = []) {
|
||||
try {
|
||||
const embeddings = await this.voyage.embedDocuments(textChunks, {
|
||||
modelName: this.model,
|
||||
batchSize: this.batchSize,
|
||||
});
|
||||
return embeddings;
|
||||
} catch (error) {
|
||||
console.error("Voyage AI Failed to embed:", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
VoyageAiEmbedder,
|
||||
};
|
@ -125,6 +125,9 @@ function getEmbeddingEngineSelection() {
|
||||
case "cohere":
|
||||
const { CohereEmbedder } = require("../EmbeddingEngines/cohere");
|
||||
return new CohereEmbedder();
|
||||
case "voyageai":
|
||||
const { VoyageAiEmbedder } = require("../EmbeddingEngines/voyageAi");
|
||||
return new VoyageAiEmbedder();
|
||||
default:
|
||||
return new NativeEmbedder();
|
||||
}
|
||||
|
@ -350,6 +350,12 @@ const KEY_MAPPING = {
|
||||
checks: [isNotEmpty],
|
||||
},
|
||||
|
||||
// VoyageAi Options
|
||||
VoyageAiApiKey: {
|
||||
envKey: "VOYAGEAI_API_KEY",
|
||||
checks: [isNotEmpty],
|
||||
},
|
||||
|
||||
// Whisper (transcription) providers
|
||||
WhisperProvider: {
|
||||
envKey: "WHISPER_PROVIDER",
|
||||
@ -545,6 +551,7 @@ function supportedEmbeddingModel(input = "") {
|
||||
"ollama",
|
||||
"lmstudio",
|
||||
"cohere",
|
||||
"voyageai",
|
||||
];
|
||||
return supported.includes(input)
|
||||
? null
|
||||
|
Loading…
Reference in New Issue
Block a user