mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-11 01:10:11 +01:00
[FEAT] Add support for Voyage AI embedder (#1401)
* add support for voyageai embedder * remove unneeded import * linting * Add ENV examples Update how chunks are processed for Voyage use correct langchain import Add data handling --------- Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
This commit is contained in:
parent
ecd5d3cb8a
commit
5bf4b4db58
@ -124,6 +124,10 @@ GID='1000'
|
|||||||
# COHERE_API_KEY=
|
# COHERE_API_KEY=
|
||||||
# EMBEDDING_MODEL_PREF='embed-english-v3.0'
|
# EMBEDDING_MODEL_PREF='embed-english-v3.0'
|
||||||
|
|
||||||
|
# EMBEDDING_ENGINE='voyageai'
|
||||||
|
# VOYAGEAI_API_KEY=
|
||||||
|
# EMBEDDING_MODEL_PREF='voyage-large-2-instruct'
|
||||||
|
|
||||||
###########################################
|
###########################################
|
||||||
######## Vector Database Selection ########
|
######## Vector Database Selection ########
|
||||||
###########################################
|
###########################################
|
||||||
|
@ -0,0 +1,50 @@
|
|||||||
|
export default function VoyageAiOptions({ settings }) {
|
||||||
|
return (
|
||||||
|
<div className="w-full flex flex-col gap-y-4">
|
||||||
|
<div className="w-full flex items-center gap-4">
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
|
API Key
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="password"
|
||||||
|
name="VoyageAiApiKey"
|
||||||
|
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||||
|
placeholder="Voyage AI API Key"
|
||||||
|
defaultValue={settings?.VoyageAiApiKey ? "*".repeat(20) : ""}
|
||||||
|
required={true}
|
||||||
|
autoComplete="off"
|
||||||
|
spellCheck={false}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
|
Model Preference
|
||||||
|
</label>
|
||||||
|
<select
|
||||||
|
name="EmbeddingModelPref"
|
||||||
|
required={true}
|
||||||
|
defaultValue={settings?.EmbeddingModelPref}
|
||||||
|
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||||
|
>
|
||||||
|
<optgroup label="Available embedding models">
|
||||||
|
{[
|
||||||
|
"voyage-large-2-instruct",
|
||||||
|
"voyage-law-2",
|
||||||
|
"voyage-code-2",
|
||||||
|
"voyage-large-2",
|
||||||
|
"voyage-2",
|
||||||
|
].map((model) => {
|
||||||
|
return (
|
||||||
|
<option key={model} value={model}>
|
||||||
|
{model}
|
||||||
|
</option>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</optgroup>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
BIN
frontend/src/media/embeddingprovider/voyageai.png
Normal file
BIN
frontend/src/media/embeddingprovider/voyageai.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 20 KiB |
@ -10,6 +10,8 @@ import LocalAiLogo from "@/media/llmprovider/localai.png";
|
|||||||
import OllamaLogo from "@/media/llmprovider/ollama.png";
|
import OllamaLogo from "@/media/llmprovider/ollama.png";
|
||||||
import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
|
import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
|
||||||
import CohereLogo from "@/media/llmprovider/cohere.png";
|
import CohereLogo from "@/media/llmprovider/cohere.png";
|
||||||
|
import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png";
|
||||||
|
|
||||||
import PreLoader from "@/components/Preloader";
|
import PreLoader from "@/components/Preloader";
|
||||||
import ChangeWarningModal from "@/components/ChangeWarning";
|
import ChangeWarningModal from "@/components/ChangeWarning";
|
||||||
import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions";
|
import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions";
|
||||||
@ -19,6 +21,7 @@ import NativeEmbeddingOptions from "@/components/EmbeddingSelection/NativeEmbedd
|
|||||||
import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions";
|
import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions";
|
||||||
import LMStudioEmbeddingOptions from "@/components/EmbeddingSelection/LMStudioOptions";
|
import LMStudioEmbeddingOptions from "@/components/EmbeddingSelection/LMStudioOptions";
|
||||||
import CohereEmbeddingOptions from "@/components/EmbeddingSelection/CohereOptions";
|
import CohereEmbeddingOptions from "@/components/EmbeddingSelection/CohereOptions";
|
||||||
|
import VoyageAiOptions from "@/components/EmbeddingSelection/VoyageAiOptions";
|
||||||
|
|
||||||
import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem";
|
import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem";
|
||||||
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
|
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
|
||||||
@ -78,6 +81,13 @@ const EMBEDDERS = [
|
|||||||
options: (settings) => <CohereEmbeddingOptions settings={settings} />,
|
options: (settings) => <CohereEmbeddingOptions settings={settings} />,
|
||||||
description: "Run powerful embedding models from Cohere.",
|
description: "Run powerful embedding models from Cohere.",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "Voyage AI",
|
||||||
|
value: "voyageai",
|
||||||
|
logo: VoyageAiLogo,
|
||||||
|
options: (settings) => <VoyageAiOptions settings={settings} />,
|
||||||
|
description: "Run powerful embedding models from Voyage AI.",
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
export default function GeneralEmbeddingPreference() {
|
export default function GeneralEmbeddingPreference() {
|
||||||
|
@ -28,6 +28,8 @@ import LanceDbLogo from "@/media/vectordbs/lancedb.png";
|
|||||||
import WeaviateLogo from "@/media/vectordbs/weaviate.png";
|
import WeaviateLogo from "@/media/vectordbs/weaviate.png";
|
||||||
import QDrantLogo from "@/media/vectordbs/qdrant.png";
|
import QDrantLogo from "@/media/vectordbs/qdrant.png";
|
||||||
import MilvusLogo from "@/media/vectordbs/milvus.png";
|
import MilvusLogo from "@/media/vectordbs/milvus.png";
|
||||||
|
import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png";
|
||||||
|
|
||||||
import React, { useState, useEffect } from "react";
|
import React, { useState, useEffect } from "react";
|
||||||
import paths from "@/utils/paths";
|
import paths from "@/utils/paths";
|
||||||
import { useNavigate } from "react-router-dom";
|
import { useNavigate } from "react-router-dom";
|
||||||
@ -292,6 +294,13 @@ export const EMBEDDING_ENGINE_PRIVACY = {
|
|||||||
],
|
],
|
||||||
logo: CohereLogo,
|
logo: CohereLogo,
|
||||||
},
|
},
|
||||||
|
voyageai: {
|
||||||
|
name: "Voyage AI",
|
||||||
|
description: [
|
||||||
|
"Data sent to Voyage AI's servers is shared according to the terms of service of voyageai.com.",
|
||||||
|
],
|
||||||
|
logo: VoyageAiLogo,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
|
export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
|
||||||
|
@ -121,6 +121,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
|
|||||||
# COHERE_API_KEY=
|
# COHERE_API_KEY=
|
||||||
# EMBEDDING_MODEL_PREF='embed-english-v3.0'
|
# EMBEDDING_MODEL_PREF='embed-english-v3.0'
|
||||||
|
|
||||||
|
# EMBEDDING_ENGINE='voyageai'
|
||||||
|
# VOYAGEAI_API_KEY=
|
||||||
|
# EMBEDDING_MODEL_PREF='voyage-large-2-instruct'
|
||||||
|
|
||||||
###########################################
|
###########################################
|
||||||
######## Vector Database Selection ########
|
######## Vector Database Selection ########
|
||||||
###########################################
|
###########################################
|
||||||
|
@ -506,7 +506,10 @@ function apiWorkspaceEndpoints(app) {
|
|||||||
if (!document) return response.sendStatus(404).end();
|
if (!document) return response.sendStatus(404).end();
|
||||||
|
|
||||||
await Document.update(document.id, { pinned: pinStatus });
|
await Document.update(document.id, { pinned: pinStatus });
|
||||||
return response.status(200).json({ message: 'Pin status updated successfully' }).end();
|
return response
|
||||||
|
.status(200)
|
||||||
|
.json({ message: "Pin status updated successfully" })
|
||||||
|
.end();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error processing the pin status update:", error);
|
console.error("Error processing the pin status update:", error);
|
||||||
return response.status(500).end();
|
return response.status(500).end();
|
||||||
|
@ -426,6 +426,9 @@ const SystemSettings = {
|
|||||||
// Cohere API Keys
|
// Cohere API Keys
|
||||||
CohereApiKey: !!process.env.COHERE_API_KEY,
|
CohereApiKey: !!process.env.COHERE_API_KEY,
|
||||||
CohereModelPref: process.env.COHERE_MODEL_PREF,
|
CohereModelPref: process.env.COHERE_MODEL_PREF,
|
||||||
|
|
||||||
|
// VoyageAi API Keys
|
||||||
|
VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -1999,7 +1999,8 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},"/v1/workspace/{slug}/update-pin": {
|
},
|
||||||
|
"/workspace/{slug}/update-pin": {
|
||||||
"post": {
|
"post": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"Workspaces"
|
"Workspaces"
|
||||||
@ -2037,6 +2038,9 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"403": {
|
||||||
|
"description": "Forbidden"
|
||||||
|
},
|
||||||
"404": {
|
"404": {
|
||||||
"description": "Document not found"
|
"description": "Document not found"
|
||||||
},
|
},
|
||||||
@ -2047,20 +2051,12 @@
|
|||||||
"requestBody": {
|
"requestBody": {
|
||||||
"description": "JSON object with the document path and pin status to update.",
|
"description": "JSON object with the document path and pin status to update.",
|
||||||
"required": true,
|
"required": true,
|
||||||
|
"type": "object",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"example": {
|
||||||
"type": "object",
|
"docPath": "custom-documents/my-pdf.pdf-hash.json",
|
||||||
"properties": {
|
"pinStatus": true
|
||||||
"docPath": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "custom-documents/my-pdf.pdf-hash.json"
|
|
||||||
},
|
|
||||||
"pinStatus": {
|
|
||||||
"type": "boolean",
|
|
||||||
"example": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
45
server/utils/EmbeddingEngines/voyageAi/index.js
Normal file
45
server/utils/EmbeddingEngines/voyageAi/index.js
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
class VoyageAiEmbedder {
|
||||||
|
constructor() {
|
||||||
|
if (!process.env.VOYAGEAI_API_KEY)
|
||||||
|
throw new Error("No Voyage AI API key was set.");
|
||||||
|
|
||||||
|
const {
|
||||||
|
VoyageEmbeddings,
|
||||||
|
} = require("@langchain/community/embeddings/voyage");
|
||||||
|
const voyage = new VoyageEmbeddings({
|
||||||
|
apiKey: process.env.VOYAGEAI_API_KEY,
|
||||||
|
});
|
||||||
|
|
||||||
|
this.voyage = voyage;
|
||||||
|
this.model = process.env.EMBEDDING_MODEL_PREF || "voyage-large-2-instruct";
|
||||||
|
|
||||||
|
// Limit of how many strings we can process in a single pass to stay with resource or network limits
|
||||||
|
this.batchSize = 128; // Voyage AI's limit per request is 128 https://docs.voyageai.com/docs/rate-limits#use-larger-batches
|
||||||
|
this.embeddingMaxChunkLength = 4000; // https://docs.voyageai.com/docs/embeddings - assume a token is roughly 4 letters with some padding
|
||||||
|
}
|
||||||
|
|
||||||
|
async embedTextInput(textInput) {
|
||||||
|
const result = await this.voyage.embedDocuments(
|
||||||
|
Array.isArray(textInput) ? textInput : [textInput],
|
||||||
|
{ modelName: this.model }
|
||||||
|
);
|
||||||
|
return result || [];
|
||||||
|
}
|
||||||
|
|
||||||
|
async embedChunks(textChunks = []) {
|
||||||
|
try {
|
||||||
|
const embeddings = await this.voyage.embedDocuments(textChunks, {
|
||||||
|
modelName: this.model,
|
||||||
|
batchSize: this.batchSize,
|
||||||
|
});
|
||||||
|
return embeddings;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Voyage AI Failed to embed:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
VoyageAiEmbedder,
|
||||||
|
};
|
@ -125,6 +125,9 @@ function getEmbeddingEngineSelection() {
|
|||||||
case "cohere":
|
case "cohere":
|
||||||
const { CohereEmbedder } = require("../EmbeddingEngines/cohere");
|
const { CohereEmbedder } = require("../EmbeddingEngines/cohere");
|
||||||
return new CohereEmbedder();
|
return new CohereEmbedder();
|
||||||
|
case "voyageai":
|
||||||
|
const { VoyageAiEmbedder } = require("../EmbeddingEngines/voyageAi");
|
||||||
|
return new VoyageAiEmbedder();
|
||||||
default:
|
default:
|
||||||
return new NativeEmbedder();
|
return new NativeEmbedder();
|
||||||
}
|
}
|
||||||
|
@ -350,6 +350,12 @@ const KEY_MAPPING = {
|
|||||||
checks: [isNotEmpty],
|
checks: [isNotEmpty],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// VoyageAi Options
|
||||||
|
VoyageAiApiKey: {
|
||||||
|
envKey: "VOYAGEAI_API_KEY",
|
||||||
|
checks: [isNotEmpty],
|
||||||
|
},
|
||||||
|
|
||||||
// Whisper (transcription) providers
|
// Whisper (transcription) providers
|
||||||
WhisperProvider: {
|
WhisperProvider: {
|
||||||
envKey: "WHISPER_PROVIDER",
|
envKey: "WHISPER_PROVIDER",
|
||||||
@ -545,6 +551,7 @@ function supportedEmbeddingModel(input = "") {
|
|||||||
"ollama",
|
"ollama",
|
||||||
"lmstudio",
|
"lmstudio",
|
||||||
"cohere",
|
"cohere",
|
||||||
|
"voyageai",
|
||||||
];
|
];
|
||||||
return supported.includes(input)
|
return supported.includes(input)
|
||||||
? null
|
? null
|
||||||
|
Loading…
Reference in New Issue
Block a user