diff --git a/docker/.env.example b/docker/.env.example index 7fedf944..23789af4 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -124,6 +124,10 @@ GID='1000' # COHERE_API_KEY= # EMBEDDING_MODEL_PREF='embed-english-v3.0' +# EMBEDDING_ENGINE='voyageai' +# VOYAGEAI_API_KEY= +# EMBEDDING_MODEL_PREF='voyage-large-2-instruct' + ########################################### ######## Vector Database Selection ######## ########################################### diff --git a/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx b/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx new file mode 100644 index 00000000..33ce693d --- /dev/null +++ b/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx @@ -0,0 +1,50 @@ +export default function VoyageAiOptions({ settings }) { + return ( +
+
+
+ + +
+
+ + +
+
+
+ ); +} diff --git a/frontend/src/media/embeddingprovider/voyageai.png b/frontend/src/media/embeddingprovider/voyageai.png new file mode 100644 index 00000000..4fd57eaa Binary files /dev/null and b/frontend/src/media/embeddingprovider/voyageai.png differ diff --git a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx index 8f234b5a..5a0f51c1 100644 --- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx @@ -10,6 +10,8 @@ import LocalAiLogo from "@/media/llmprovider/localai.png"; import OllamaLogo from "@/media/llmprovider/ollama.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import CohereLogo from "@/media/llmprovider/cohere.png"; +import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png"; + import PreLoader from "@/components/Preloader"; import ChangeWarningModal from "@/components/ChangeWarning"; import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions"; @@ -19,6 +21,7 @@ import NativeEmbeddingOptions from "@/components/EmbeddingSelection/NativeEmbedd import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions"; import LMStudioEmbeddingOptions from "@/components/EmbeddingSelection/LMStudioOptions"; import CohereEmbeddingOptions from "@/components/EmbeddingSelection/CohereOptions"; +import VoyageAiOptions from "@/components/EmbeddingSelection/VoyageAiOptions"; import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; @@ -78,6 +81,13 @@ const EMBEDDERS = [ options: (settings) => , description: "Run powerful embedding models from Cohere.", }, + { + name: "Voyage AI", + value: "voyageai", + logo: VoyageAiLogo, + options: (settings) => , + description: "Run powerful embedding models from Voyage AI.", + }, ]; export default function GeneralEmbeddingPreference() { diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index b6ae8cb2..35358636 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -28,6 +28,8 @@ import LanceDbLogo from "@/media/vectordbs/lancedb.png"; import WeaviateLogo from "@/media/vectordbs/weaviate.png"; import QDrantLogo from "@/media/vectordbs/qdrant.png"; import MilvusLogo from "@/media/vectordbs/milvus.png"; +import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png"; + import React, { useState, useEffect } from "react"; import paths from "@/utils/paths"; import { useNavigate } from "react-router-dom"; @@ -292,6 +294,13 @@ export const EMBEDDING_ENGINE_PRIVACY = { ], logo: CohereLogo, }, + voyageai: { + name: "Voyage AI", + description: [ + "Data sent to Voyage AI's servers is shared according to the terms of service of voyageai.com.", + ], + logo: VoyageAiLogo, + }, }; export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) { diff --git a/server/.env.example b/server/.env.example index 4be9ab75..e38250be 100644 --- a/server/.env.example +++ b/server/.env.example @@ -121,6 +121,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea # COHERE_API_KEY= # EMBEDDING_MODEL_PREF='embed-english-v3.0' +# EMBEDDING_ENGINE='voyageai' +# VOYAGEAI_API_KEY= +# EMBEDDING_MODEL_PREF='voyage-large-2-instruct' + ########################################### ######## Vector Database Selection ######## ########################################### diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index 7cd2dd47..cbbf1f23 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -498,15 +498,18 @@ function apiWorkspaceEndpoints(app) { const { slug = null } = request.params; const { docPath, pinStatus = false } = reqBody(request); const workspace = await Workspace.get({ slug }); - + const document = await Document.get({ workspaceId: workspace.id, docpath: docPath, }); if (!document) return response.sendStatus(404).end(); - + await Document.update(document.id, { pinned: pinStatus }); - return response.status(200).json({ message: 'Pin status updated successfully' }).end(); + return response + .status(200) + .json({ message: "Pin status updated successfully" }) + .end(); } catch (error) { console.error("Error processing the pin status update:", error); return response.status(500).end(); diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index c8e239f1..a5bb6a23 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -426,6 +426,9 @@ const SystemSettings = { // Cohere API Keys CohereApiKey: !!process.env.COHERE_API_KEY, CohereModelPref: process.env.COHERE_MODEL_PREF, + + // VoyageAi API Keys + VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY, }; }, diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index b98891c9..8616943c 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -1999,7 +1999,8 @@ } } } - },"/v1/workspace/{slug}/update-pin": { + }, + "/workspace/{slug}/update-pin": { "post": { "tags": [ "Workspaces" @@ -2037,6 +2038,9 @@ } } }, + "403": { + "description": "Forbidden" + }, "404": { "description": "Document not found" }, @@ -2047,20 +2051,12 @@ "requestBody": { "description": "JSON object with the document path and pin status to update.", "required": true, + "type": "object", "content": { "application/json": { - "schema": { - "type": "object", - "properties": { - "docPath": { - "type": "string", - "example": "custom-documents/my-pdf.pdf-hash.json" - }, - "pinStatus": { - "type": "boolean", - "example": true - } - } + "example": { + "docPath": "custom-documents/my-pdf.pdf-hash.json", + "pinStatus": true } } } diff --git a/server/utils/EmbeddingEngines/voyageAi/index.js b/server/utils/EmbeddingEngines/voyageAi/index.js new file mode 100644 index 00000000..b25d3208 --- /dev/null +++ b/server/utils/EmbeddingEngines/voyageAi/index.js @@ -0,0 +1,45 @@ +class VoyageAiEmbedder { + constructor() { + if (!process.env.VOYAGEAI_API_KEY) + throw new Error("No Voyage AI API key was set."); + + const { + VoyageEmbeddings, + } = require("@langchain/community/embeddings/voyage"); + const voyage = new VoyageEmbeddings({ + apiKey: process.env.VOYAGEAI_API_KEY, + }); + + this.voyage = voyage; + this.model = process.env.EMBEDDING_MODEL_PREF || "voyage-large-2-instruct"; + + // Limit of how many strings we can process in a single pass to stay with resource or network limits + this.batchSize = 128; // Voyage AI's limit per request is 128 https://docs.voyageai.com/docs/rate-limits#use-larger-batches + this.embeddingMaxChunkLength = 4000; // https://docs.voyageai.com/docs/embeddings - assume a token is roughly 4 letters with some padding + } + + async embedTextInput(textInput) { + const result = await this.voyage.embedDocuments( + Array.isArray(textInput) ? textInput : [textInput], + { modelName: this.model } + ); + return result || []; + } + + async embedChunks(textChunks = []) { + try { + const embeddings = await this.voyage.embedDocuments(textChunks, { + modelName: this.model, + batchSize: this.batchSize, + }); + return embeddings; + } catch (error) { + console.error("Voyage AI Failed to embed:", error); + throw error; + } + } +} + +module.exports = { + VoyageAiEmbedder, +}; diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index d9a1ba09..e60202a6 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -125,6 +125,9 @@ function getEmbeddingEngineSelection() { case "cohere": const { CohereEmbedder } = require("../EmbeddingEngines/cohere"); return new CohereEmbedder(); + case "voyageai": + const { VoyageAiEmbedder } = require("../EmbeddingEngines/voyageAi"); + return new VoyageAiEmbedder(); default: return new NativeEmbedder(); } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 48c98e95..40154163 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -350,6 +350,12 @@ const KEY_MAPPING = { checks: [isNotEmpty], }, + // VoyageAi Options + VoyageAiApiKey: { + envKey: "VOYAGEAI_API_KEY", + checks: [isNotEmpty], + }, + // Whisper (transcription) providers WhisperProvider: { envKey: "WHISPER_PROVIDER", @@ -545,6 +551,7 @@ function supportedEmbeddingModel(input = "") { "ollama", "lmstudio", "cohere", + "voyageai", ]; return supported.includes(input) ? null