[FEAT] Add support for Voyage AI embedder (#1401)

* add support for voyageai embedder

* remove unneeded import

* linting

* Add ENV examples
Update how chunks are processed for Voyage
use correct langchain import
Add data handling

---------

Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
This commit is contained in:
Sean Hatfield 2024-05-19 11:20:23 -07:00 committed by GitHub
parent ecd5d3cb8a
commit 5bf4b4db58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 150 additions and 16 deletions

View File

@ -124,6 +124,10 @@ GID='1000'
# COHERE_API_KEY=
# EMBEDDING_MODEL_PREF='embed-english-v3.0'
# EMBEDDING_ENGINE='voyageai'
# VOYAGEAI_API_KEY=
# EMBEDDING_MODEL_PREF='voyage-large-2-instruct'
###########################################
######## Vector Database Selection ########
###########################################

View File

@ -0,0 +1,50 @@
export default function VoyageAiOptions({ settings }) {
return (
<div className="w-full flex flex-col gap-y-4">
<div className="w-full flex items-center gap-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
API Key
</label>
<input
type="password"
name="VoyageAiApiKey"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="Voyage AI API Key"
defaultValue={settings?.VoyageAiApiKey ? "*".repeat(20) : ""}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Model Preference
</label>
<select
name="EmbeddingModelPref"
required={true}
defaultValue={settings?.EmbeddingModelPref}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<optgroup label="Available embedding models">
{[
"voyage-large-2-instruct",
"voyage-law-2",
"voyage-code-2",
"voyage-large-2",
"voyage-2",
].map((model) => {
return (
<option key={model} value={model}>
{model}
</option>
);
})}
</optgroup>
</select>
</div>
</div>
</div>
);
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -10,6 +10,8 @@ import LocalAiLogo from "@/media/llmprovider/localai.png";
import OllamaLogo from "@/media/llmprovider/ollama.png";
import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
import CohereLogo from "@/media/llmprovider/cohere.png";
import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png";
import PreLoader from "@/components/Preloader";
import ChangeWarningModal from "@/components/ChangeWarning";
import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions";
@ -19,6 +21,7 @@ import NativeEmbeddingOptions from "@/components/EmbeddingSelection/NativeEmbedd
import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions";
import LMStudioEmbeddingOptions from "@/components/EmbeddingSelection/LMStudioOptions";
import CohereEmbeddingOptions from "@/components/EmbeddingSelection/CohereOptions";
import VoyageAiOptions from "@/components/EmbeddingSelection/VoyageAiOptions";
import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem";
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
@ -78,6 +81,13 @@ const EMBEDDERS = [
options: (settings) => <CohereEmbeddingOptions settings={settings} />,
description: "Run powerful embedding models from Cohere.",
},
{
name: "Voyage AI",
value: "voyageai",
logo: VoyageAiLogo,
options: (settings) => <VoyageAiOptions settings={settings} />,
description: "Run powerful embedding models from Voyage AI.",
},
];
export default function GeneralEmbeddingPreference() {

View File

@ -28,6 +28,8 @@ import LanceDbLogo from "@/media/vectordbs/lancedb.png";
import WeaviateLogo from "@/media/vectordbs/weaviate.png";
import QDrantLogo from "@/media/vectordbs/qdrant.png";
import MilvusLogo from "@/media/vectordbs/milvus.png";
import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png";
import React, { useState, useEffect } from "react";
import paths from "@/utils/paths";
import { useNavigate } from "react-router-dom";
@ -292,6 +294,13 @@ export const EMBEDDING_ENGINE_PRIVACY = {
],
logo: CohereLogo,
},
voyageai: {
name: "Voyage AI",
description: [
"Data sent to Voyage AI's servers is shared according to the terms of service of voyageai.com.",
],
logo: VoyageAiLogo,
},
};
export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {

View File

@ -121,6 +121,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
# COHERE_API_KEY=
# EMBEDDING_MODEL_PREF='embed-english-v3.0'
# EMBEDDING_ENGINE='voyageai'
# VOYAGEAI_API_KEY=
# EMBEDDING_MODEL_PREF='voyage-large-2-instruct'
###########################################
######## Vector Database Selection ########
###########################################

View File

@ -498,15 +498,18 @@ function apiWorkspaceEndpoints(app) {
const { slug = null } = request.params;
const { docPath, pinStatus = false } = reqBody(request);
const workspace = await Workspace.get({ slug });
const document = await Document.get({
workspaceId: workspace.id,
docpath: docPath,
});
if (!document) return response.sendStatus(404).end();
await Document.update(document.id, { pinned: pinStatus });
return response.status(200).json({ message: 'Pin status updated successfully' }).end();
return response
.status(200)
.json({ message: "Pin status updated successfully" })
.end();
} catch (error) {
console.error("Error processing the pin status update:", error);
return response.status(500).end();

View File

@ -426,6 +426,9 @@ const SystemSettings = {
// Cohere API Keys
CohereApiKey: !!process.env.COHERE_API_KEY,
CohereModelPref: process.env.COHERE_MODEL_PREF,
// VoyageAi API Keys
VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY,
};
},

View File

@ -1999,7 +1999,8 @@
}
}
}
},"/v1/workspace/{slug}/update-pin": {
},
"/workspace/{slug}/update-pin": {
"post": {
"tags": [
"Workspaces"
@ -2037,6 +2038,9 @@
}
}
},
"403": {
"description": "Forbidden"
},
"404": {
"description": "Document not found"
},
@ -2047,20 +2051,12 @@
"requestBody": {
"description": "JSON object with the document path and pin status to update.",
"required": true,
"type": "object",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"docPath": {
"type": "string",
"example": "custom-documents/my-pdf.pdf-hash.json"
},
"pinStatus": {
"type": "boolean",
"example": true
}
}
"example": {
"docPath": "custom-documents/my-pdf.pdf-hash.json",
"pinStatus": true
}
}
}

View File

@ -0,0 +1,45 @@
class VoyageAiEmbedder {
constructor() {
if (!process.env.VOYAGEAI_API_KEY)
throw new Error("No Voyage AI API key was set.");
const {
VoyageEmbeddings,
} = require("@langchain/community/embeddings/voyage");
const voyage = new VoyageEmbeddings({
apiKey: process.env.VOYAGEAI_API_KEY,
});
this.voyage = voyage;
this.model = process.env.EMBEDDING_MODEL_PREF || "voyage-large-2-instruct";
// Limit of how many strings we can process in a single pass to stay with resource or network limits
this.batchSize = 128; // Voyage AI's limit per request is 128 https://docs.voyageai.com/docs/rate-limits#use-larger-batches
this.embeddingMaxChunkLength = 4000; // https://docs.voyageai.com/docs/embeddings - assume a token is roughly 4 letters with some padding
}
async embedTextInput(textInput) {
const result = await this.voyage.embedDocuments(
Array.isArray(textInput) ? textInput : [textInput],
{ modelName: this.model }
);
return result || [];
}
async embedChunks(textChunks = []) {
try {
const embeddings = await this.voyage.embedDocuments(textChunks, {
modelName: this.model,
batchSize: this.batchSize,
});
return embeddings;
} catch (error) {
console.error("Voyage AI Failed to embed:", error);
throw error;
}
}
}
module.exports = {
VoyageAiEmbedder,
};

View File

@ -125,6 +125,9 @@ function getEmbeddingEngineSelection() {
case "cohere":
const { CohereEmbedder } = require("../EmbeddingEngines/cohere");
return new CohereEmbedder();
case "voyageai":
const { VoyageAiEmbedder } = require("../EmbeddingEngines/voyageAi");
return new VoyageAiEmbedder();
default:
return new NativeEmbedder();
}

View File

@ -350,6 +350,12 @@ const KEY_MAPPING = {
checks: [isNotEmpty],
},
// VoyageAi Options
VoyageAiApiKey: {
envKey: "VOYAGEAI_API_KEY",
checks: [isNotEmpty],
},
// Whisper (transcription) providers
WhisperProvider: {
envKey: "WHISPER_PROVIDER",
@ -545,6 +551,7 @@ function supportedEmbeddingModel(input = "") {
"ollama",
"lmstudio",
"cohere",
"voyageai",
];
return supported.includes(input)
? null