From f1401395342488566820cbf189b8eeeb8c53da7f Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Fri, 17 May 2024 14:03:25 -0700 Subject: [PATCH 1/9] Patch WSS upgrade for manual HTTPS certs (#1429) * Patch WSS upgrade for manual HTTPS certs * update comment * refactor --- server/.gitignore | 5 ++++- server/index.js | 15 +++++++++------ server/utils/boot/index.js | 11 +++++++---- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/server/.gitignore b/server/.gitignore index b22a054f..adcf7aa4 100644 --- a/server/.gitignore +++ b/server/.gitignore @@ -18,4 +18,7 @@ public/ # For legacy copies of repo documents vector-cache -yarn-error.log \ No newline at end of file +yarn-error.log + +# Local SSL Certs for HTTPS +sslcert \ No newline at end of file diff --git a/server/index.js b/server/index.js index 7874045b..59d8fec6 100644 --- a/server/index.js +++ b/server/index.js @@ -36,7 +36,12 @@ app.use( }) ); -require("express-ws")(app); +if (!!process.env.ENABLE_HTTPS) { + bootSSL(app, process.env.SERVER_PORT || 3001); +} else { + require("express-ws")(app); // load WebSockets in non-SSL mode. +} + app.use("/api", apiRouter); systemEndpoints(apiRouter); extensionEndpoints(apiRouter); @@ -109,8 +114,6 @@ app.all("*", function (_, response) { response.sendStatus(404); }); -if (!!process.env.ENABLE_HTTPS) { - bootSSL(app, process.env.SERVER_PORT || 3001); -} else { - bootHTTP(app, process.env.SERVER_PORT || 3001); -} +// In non-https mode we need to boot at the end since the server has not yet +// started and is `.listen`ing. +if (!process.env.ENABLE_HTTPS) bootHTTP(app, process.env.SERVER_PORT || 3001); diff --git a/server/utils/boot/index.js b/server/utils/boot/index.js index ea95e1f5..2022f66e 100644 --- a/server/utils/boot/index.js +++ b/server/utils/boot/index.js @@ -12,16 +12,18 @@ function bootSSL(app, port = 3001) { const privateKey = fs.readFileSync(process.env.HTTPS_KEY_PATH); const certificate = fs.readFileSync(process.env.HTTPS_CERT_PATH); const credentials = { key: privateKey, cert: certificate }; + const server = https.createServer(credentials, app); - https - .createServer(credentials, app) + server .listen(port, async () => { await setupTelemetry(); new CommunicationKey(true); console.log(`Primary server in HTTPS mode listening on port ${port}`); }) .on("error", catchSigTerms); - return app; + + require("express-ws")(app, server); // Apply same certificate + server for WSS connections + return { app, server }; } catch (e) { console.error( `\x1b[31m[SSL BOOT FAILED]\x1b[0m ${e.message} - falling back to HTTP boot.`, @@ -46,7 +48,8 @@ function bootHTTP(app, port = 3001) { console.log(`Primary server in HTTP mode listening on port ${port}`); }) .on("error", catchSigTerms); - return app; + + return { app, server: null }; } function catchSigTerms() { From 3794ef8dfd7f183e072b19b4f5a8442bdce2708e Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Fri, 17 May 2024 17:33:19 -0700 Subject: [PATCH 2/9] patch: use workspace LLM for embed chats and fallback to system resolves #1439 --- server/utils/chats/embed.js | 1 + 1 file changed, 1 insertion(+) diff --git a/server/utils/chats/embed.js b/server/utils/chats/embed.js index 98b096fb..8488aedd 100644 --- a/server/utils/chats/embed.js +++ b/server/utils/chats/embed.js @@ -29,6 +29,7 @@ async function streamChatWithForEmbed( const uuid = uuidv4(); const LLMConnector = getLLMProvider({ + provider: embed?.workspace?.chatProvider, model: chatModel ?? embed.workspace?.chatModel, }); const VectorDb = getVectorDbClass(); From 1a5aacb001259fa98fbb97548d4ce0c8410cd470 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Fri, 17 May 2024 21:31:29 -0700 Subject: [PATCH 3/9] Support multi-model whispers (#1444) --- .../utils/WhisperProviders/localWhisper.js | 16 ++-- .../NativeTranscriptionOptions/index.jsx | 93 ++++++++++++++----- .../TranscriptionPreference/index.jsx | 37 ++++---- server/models/systemSettings.js | 2 + server/utils/collectorApi/index.js | 1 + server/utils/helpers/updateENV.js | 15 +++ 6 files changed, 118 insertions(+), 46 deletions(-) diff --git a/collector/utils/WhisperProviders/localWhisper.js b/collector/utils/WhisperProviders/localWhisper.js index 46dbe226..af13c8a9 100644 --- a/collector/utils/WhisperProviders/localWhisper.js +++ b/collector/utils/WhisperProviders/localWhisper.js @@ -1,19 +1,23 @@ const fs = require("fs"); const path = require("path"); const { v4 } = require("uuid"); +const defaultWhisper = "Xenova/whisper-small"; // Model Card: https://huggingface.co/Xenova/whisper-small +const fileSize = { + "Xenova/whisper-small": "250mb", + "Xenova/whisper-large": "1.56GB", +}; class LocalWhisper { - constructor() { - // Model Card: https://huggingface.co/Xenova/whisper-small - this.model = "Xenova/whisper-small"; + constructor({ options }) { + this.model = options?.WhisperModelPref ?? defaultWhisper; + this.fileSize = fileSize[this.model]; this.cacheDir = path.resolve( process.env.STORAGE_DIR ? path.resolve(process.env.STORAGE_DIR, `models`) : path.resolve(__dirname, `../../../server/storage/models`) ); - this.modelPath = path.resolve(this.cacheDir, "Xenova", "whisper-small"); - + this.modelPath = path.resolve(this.cacheDir, ...this.model.split("/")); // Make directory when it does not exist in existing installations if (!fs.existsSync(this.cacheDir)) fs.mkdirSync(this.cacheDir, { recursive: true }); @@ -104,7 +108,7 @@ class LocalWhisper { async client() { if (!fs.existsSync(this.modelPath)) { this.#log( - `The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)` + `The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~${this.fileSize})` ); } diff --git a/frontend/src/components/TranscriptionSelection/NativeTranscriptionOptions/index.jsx b/frontend/src/components/TranscriptionSelection/NativeTranscriptionOptions/index.jsx index 07ee1212..d2e81a68 100644 --- a/frontend/src/components/TranscriptionSelection/NativeTranscriptionOptions/index.jsx +++ b/frontend/src/components/TranscriptionSelection/NativeTranscriptionOptions/index.jsx @@ -1,38 +1,89 @@ import { Gauge } from "@phosphor-icons/react"; -export default function NativeTranscriptionOptions() { +import { useState } from "react"; + +export default function NativeTranscriptionOptions({ settings }) { + const [model, setModel] = useState(settings?.WhisperModelPref); + return (
-
-
- -

- Using the local whisper model on machines with limited RAM or CPU - can stall AnythingLLM when processing media files. -
- We recommend at least 2GB of RAM and upload files <10Mb. -
-
- - The built-in model will automatically download on the first use. - -

-
-
+
); } + +function LocalWarning({ model }) { + switch (model) { + case "Xenova/whisper-small": + return ; + case "Xenova/whisper-large": + return ; + default: + return ; + } +} + +function WhisperSmall() { + return ( +
+
+ +

+ Running the whisper-small model on a machine with limited RAM + or CPU can stall AnythingLLM when processing media files. +
+ We recommend at least 2GB of RAM and upload files <10Mb. +
+
+ + This model will automatically download on the first use. (250mb) + +

+
+
+ ); +} + +function WhisperLarge() { + return ( +
+
+ +

+ Using the whisper-large model on machines with limited RAM or + CPU can stall AnythingLLM when processing media files. This model is + substantially larger than the whisper-small. +
+ We recommend at least 8GB of RAM and upload files <10Mb. +
+
+ + This model will automatically download on the first use. (1.56GB) + +

+
+
+ ); +} diff --git a/frontend/src/pages/GeneralSettings/TranscriptionPreference/index.jsx b/frontend/src/pages/GeneralSettings/TranscriptionPreference/index.jsx index 5fbd196c..07907af7 100644 --- a/frontend/src/pages/GeneralSettings/TranscriptionPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/TranscriptionPreference/index.jsx @@ -12,6 +12,23 @@ import LLMItem from "@/components/LLMSelection/LLMItem"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; import CTAButton from "@/components/lib/CTAButton"; +const PROVIDERS = [ + { + name: "OpenAI", + value: "openai", + logo: OpenAiLogo, + options: (settings) => , + description: "Leverage the OpenAI Whisper-large model using your API key.", + }, + { + name: "AnythingLLM Built-In", + value: "local", + logo: AnythingLLMIcon, + options: (settings) => , + description: "Run a built-in whisper model on this instance privately.", + }, +]; + export default function TranscriptionModelPreference() { const [saving, setSaving] = useState(false); const [hasChanges, setHasChanges] = useState(false); @@ -68,24 +85,6 @@ export default function TranscriptionModelPreference() { fetchKeys(); }, []); - const PROVIDERS = [ - { - name: "OpenAI", - value: "openai", - logo: OpenAiLogo, - options: , - description: - "Leverage the OpenAI Whisper-large model using your API key.", - }, - { - name: "AnythingLLM Built-In", - value: "local", - logo: AnythingLLMIcon, - options: , - description: "Run a built-in whisper model on this instance privately.", - }, - ]; - useEffect(() => { const filtered = PROVIDERS.filter((provider) => provider.name.toLowerCase().includes(searchQuery.toLowerCase()) @@ -228,7 +227,7 @@ export default function TranscriptionModelPreference() { {selectedProvider && PROVIDERS.find( (provider) => provider.value === selectedProvider - )?.options} + )?.options(settings)} diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 68d1d0dd..c8e239f1 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -150,6 +150,8 @@ const SystemSettings = { // - then it can be shared. // -------------------------------------------------------- WhisperProvider: process.env.WHISPER_PROVIDER || "local", + WhisperModelPref: + process.env.WHISPER_MODEL_PREF || "Xenova/whisper-small", // -------------------------------------------------------- // TTS/STT Selection Settings & Configs diff --git a/server/utils/collectorApi/index.js b/server/utils/collectorApi/index.js index 1a1431ac..6971f640 100644 --- a/server/utils/collectorApi/index.js +++ b/server/utils/collectorApi/index.js @@ -17,6 +17,7 @@ class CollectorApi { #attachOptions() { return { whisperProvider: process.env.WHISPER_PROVIDER || "local", + WhisperModelPref: process.env.WHISPER_MODEL_PREF, openAiKey: process.env.OPEN_AI_KEY || null, }; } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 8630d85a..48c98e95 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -356,6 +356,11 @@ const KEY_MAPPING = { checks: [isNotEmpty, supportedTranscriptionProvider], postUpdate: [], }, + WhisperModelPref: { + envKey: "WHISPER_MODEL_PREF", + checks: [validLocalWhisper], + postUpdate: [], + }, // System Settings AuthToken: { @@ -468,6 +473,16 @@ function supportedTTSProvider(input = "") { return validSelection ? null : `${input} is not a valid TTS provider.`; } +function validLocalWhisper(input = "") { + const validSelection = [ + "Xenova/whisper-small", + "Xenova/whisper-large", + ].includes(input); + return validSelection + ? null + : `${input} is not a valid Whisper model selection.`; +} + function supportedLLM(input = "") { const validSelection = [ "openai", From 9ace0e67e68aa5dbe9c29c2fc66d981de18469f6 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Fri, 17 May 2024 21:44:55 -0700 Subject: [PATCH 4/9] Validate max_tokens is number (#1445) --- server/utils/AiProviders/genericOpenAi/index.js | 5 ++++- server/utils/agents/aibitat/plugins/summarize.js | 1 - server/utils/agents/aibitat/providers/genericOpenAi.js | 5 ++++- server/utils/http/index.js | 6 ++++++ 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/server/utils/AiProviders/genericOpenAi/index.js b/server/utils/AiProviders/genericOpenAi/index.js index dc0264e4..46b8aefb 100644 --- a/server/utils/AiProviders/genericOpenAi/index.js +++ b/server/utils/AiProviders/genericOpenAi/index.js @@ -2,6 +2,7 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native"); const { handleDefaultStreamResponseV2, } = require("../../helpers/chat/responses"); +const { toValidNumber } = require("../../http"); class GenericOpenAiLLM { constructor(embedder = null, modelPreference = null) { @@ -18,7 +19,9 @@ class GenericOpenAiLLM { }); this.model = modelPreference ?? process.env.GENERIC_OPEN_AI_MODEL_PREF ?? null; - this.maxTokens = process.env.GENERIC_OPEN_AI_MAX_TOKENS ?? 1024; + this.maxTokens = process.env.GENERIC_OPEN_AI_MAX_TOKENS + ? toValidNumber(process.env.GENERIC_OPEN_AI_MAX_TOKENS, 1024) + : 1024; if (!this.model) throw new Error("GenericOpenAI must have a valid model set."); this.limits = { diff --git a/server/utils/agents/aibitat/plugins/summarize.js b/server/utils/agents/aibitat/plugins/summarize.js index 526de116..de1657c9 100644 --- a/server/utils/agents/aibitat/plugins/summarize.js +++ b/server/utils/agents/aibitat/plugins/summarize.js @@ -1,6 +1,5 @@ const { Document } = require("../../../../models/documents"); const { safeJsonParse } = require("../../../http"); -const { validate } = require("uuid"); const { summarizeContent } = require("../utils/summarize"); const Provider = require("../providers/ai-provider"); diff --git a/server/utils/agents/aibitat/providers/genericOpenAi.js b/server/utils/agents/aibitat/providers/genericOpenAi.js index a1b2db3e..9a753ca2 100644 --- a/server/utils/agents/aibitat/providers/genericOpenAi.js +++ b/server/utils/agents/aibitat/providers/genericOpenAi.js @@ -2,6 +2,7 @@ const OpenAI = require("openai"); const Provider = require("./ai-provider.js"); const InheritMultiple = require("./helpers/classes.js"); const UnTooled = require("./helpers/untooled.js"); +const { toValidNumber } = require("../../../http/index.js"); /** * The agent provider for the Generic OpenAI provider. @@ -24,7 +25,9 @@ class GenericOpenAiProvider extends InheritMultiple([Provider, UnTooled]) { this._client = client; this.model = model; this.verbose = true; - this.maxTokens = process.env.GENERIC_OPEN_AI_MAX_TOKENS ?? 1024; + this.maxTokens = process.env.GENERIC_OPEN_AI_MAX_TOKENS + ? toValidNumber(process.env.GENERIC_OPEN_AI_MAX_TOKENS, 1024) + : 1024; } get client() { diff --git a/server/utils/http/index.js b/server/utils/http/index.js index 6400c36b..e812b8ab 100644 --- a/server/utils/http/index.js +++ b/server/utils/http/index.js @@ -91,6 +91,11 @@ function isValidUrl(urlString = "") { return false; } +function toValidNumber(number = null, fallback = null) { + if (isNaN(Number(number))) return fallback; + return Number(number); +} + module.exports = { reqBody, multiUserMode, @@ -101,4 +106,5 @@ module.exports = { parseAuthHeader, safeJsonParse, isValidUrl, + toValidNumber, }; From 396e04b0cca008795719db6f5d7de1965848d935 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Sat, 18 May 2024 21:29:23 -0700 Subject: [PATCH 5/9] Icon Button Tooltips (#1451) - add tooltips to footer buttons to clear ambiguity on what they might mean - Updated Generic OpenAI connector to not have a ton of inputs in one row --- frontend/.gitignore | 2 - frontend/src/components/Footer/index.jsx | 88 ++++++---- .../GenericOpenAiOptions/index.jsx | 152 +++++++++--------- .../src/components/SettingsButton/index.jsx | 39 +++-- .../src/components/SettingsSidebar/index.jsx | 4 +- .../ChatContainer/PromptInput/index.jsx | 28 +++- 6 files changed, 183 insertions(+), 130 deletions(-) diff --git a/frontend/.gitignore b/frontend/.gitignore index 78720603..77e294d0 100644 --- a/frontend/.gitignore +++ b/frontend/.gitignore @@ -9,10 +9,8 @@ lerna-debug.log* node_modules dist -lib dist-ssr *.local -!frontend/components/lib # Editor directories and files .vscode/* diff --git a/frontend/src/components/Footer/index.jsx b/frontend/src/components/Footer/index.jsx index 10cd80cd..6e80f0df 100644 --- a/frontend/src/components/Footer/index.jsx +++ b/frontend/src/components/Footer/index.jsx @@ -14,6 +14,8 @@ import { import React, { useEffect, useState } from "react"; import SettingsButton from "../SettingsButton"; import { isMobile } from "react-device-detect"; +import { Tooltip } from "react-tooltip"; +import { v4 } from "uuid"; export const MAX_ICONS = 3; export const ICON_COMPONENTS = { @@ -47,36 +49,48 @@ export default function Footer() { return (
- - - - - - - - - + + + + + + + + + + + + + + + {!isMobile && }
@@ -105,3 +119,17 @@ export default function Footer() { ); } + +export function ToolTipWrapper({ id = v4(), children }) { + return ( +
+ {children} + +
+ ); +} diff --git a/frontend/src/components/LLMSelection/GenericOpenAiOptions/index.jsx b/frontend/src/components/LLMSelection/GenericOpenAiOptions/index.jsx index ac143e94..d1088063 100644 --- a/frontend/src/components/LLMSelection/GenericOpenAiOptions/index.jsx +++ b/frontend/src/components/LLMSelection/GenericOpenAiOptions/index.jsx @@ -1,80 +1,84 @@ export default function GenericOpenAiOptions({ settings }) { return ( -
-
- - +
+
+
+ + +
+
+ + +
+
+ + +
-
- - -
-
- - -
-
- - e.target.blur()} - defaultValue={settings?.GenericOpenAiTokenLimit} - required={true} - autoComplete="off" - /> -
-
- - +
+
+ + e.target.blur()} + defaultValue={settings?.GenericOpenAiTokenLimit} + required={true} + autoComplete="off" + /> +
+
+ + +
); diff --git a/frontend/src/components/SettingsButton/index.jsx b/frontend/src/components/SettingsButton/index.jsx index ac2d22cd..19a4a17a 100644 --- a/frontend/src/components/SettingsButton/index.jsx +++ b/frontend/src/components/SettingsButton/index.jsx @@ -3,6 +3,7 @@ import paths from "@/utils/paths"; import { ArrowUUpLeft, Wrench } from "@phosphor-icons/react"; import { Link } from "react-router-dom"; import { useMatch } from "react-router-dom"; +import { ToolTipWrapper } from "../Footer"; export default function SettingsButton() { const isInSettings = !!useMatch("/settings/*"); @@ -12,22 +13,32 @@ export default function SettingsButton() { if (isInSettings) return ( - - - + + + + + ); return ( - - - + + + + + ); } diff --git a/frontend/src/components/SettingsSidebar/index.jsx b/frontend/src/components/SettingsSidebar/index.jsx index 6b8f79e5..2d59d0ff 100644 --- a/frontend/src/components/SettingsSidebar/index.jsx +++ b/frontend/src/components/SettingsSidebar/index.jsx @@ -329,7 +329,7 @@ const SidebarOptions = ({ user = null }) => (
From bea36d65a076a5ba2c5b849e6b12edafb68f63f4 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Sat, 18 May 2024 21:30:33 -0700 Subject: [PATCH 6/9] General Workspace settings UI updates (#1452) General Workspace settings updates 05/17 - Move vector count to vector database - Modify hint on LLM Temperature --- .../ChatSettings/ChatTemperatureSettings/index.jsx | 14 +++++++++----- .../WorkspaceSettings/GeneralAppearance/index.jsx | 2 -- .../VectorCount/index.jsx | 3 --- .../WorkspaceSettings/VectorDatabase/index.jsx | 6 +++++- 4 files changed, 14 insertions(+), 11 deletions(-) rename frontend/src/pages/WorkspaceSettings/{GeneralAppearance => VectorDatabase}/VectorCount/index.jsx (88%) diff --git a/frontend/src/pages/WorkspaceSettings/ChatSettings/ChatTemperatureSettings/index.jsx b/frontend/src/pages/WorkspaceSettings/ChatSettings/ChatTemperatureSettings/index.jsx index 5cbcdc3b..08565f58 100644 --- a/frontend/src/pages/WorkspaceSettings/ChatSettings/ChatTemperatureSettings/index.jsx +++ b/frontend/src/pages/WorkspaceSettings/ChatSettings/ChatTemperatureSettings/index.jsx @@ -20,19 +20,23 @@ export default function ChatTemperatureSettings({ LLM Temperature

- This setting controls how "random" or dynamic your chat - responses will be. + This setting controls how "creative" your LLM responses will + be.
- The higher the number (1.0 maximum) the more random and incoherent. + The higher the number the more creative. For some models this can lead + to incoherent responses when set too high.
- Recommended: {defaults.temp} +
+ + Most LLMs have various acceptable ranges of valid values. Consult + your LLM provider for that information. +

e.target.blur()} defaultValue={workspace?.openAiTemp ?? defaults.temp} diff --git a/frontend/src/pages/WorkspaceSettings/GeneralAppearance/index.jsx b/frontend/src/pages/WorkspaceSettings/GeneralAppearance/index.jsx index 5e4053f0..101a3a9b 100644 --- a/frontend/src/pages/WorkspaceSettings/GeneralAppearance/index.jsx +++ b/frontend/src/pages/WorkspaceSettings/GeneralAppearance/index.jsx @@ -2,7 +2,6 @@ import Workspace from "@/models/workspace"; import { castToType } from "@/utils/types"; import showToast from "@/utils/toast"; import { useEffect, useRef, useState } from "react"; -import VectorCount from "./VectorCount"; import WorkspaceName from "./WorkspaceName"; import SuggestedChatMessages from "./SuggestedChatMessages"; import DeleteWorkspace from "./DeleteWorkspace"; @@ -51,7 +50,6 @@ export default function GeneralInfo({ slug }) { onSubmit={handleUpdate} className="w-1/2 flex flex-col gap-y-6" > -

Number of vectors

-

- Total number of vectors in your vector database. -

{totalVectors}

diff --git a/frontend/src/pages/WorkspaceSettings/VectorDatabase/index.jsx b/frontend/src/pages/WorkspaceSettings/VectorDatabase/index.jsx index 0a9a0e87..97d63291 100644 --- a/frontend/src/pages/WorkspaceSettings/VectorDatabase/index.jsx +++ b/frontend/src/pages/WorkspaceSettings/VectorDatabase/index.jsx @@ -6,6 +6,7 @@ import VectorDBIdentifier from "./VectorDBIdentifier"; import MaxContextSnippets from "./MaxContextSnippets"; import DocumentSimilarityThreshold from "./DocumentSimilarityThreshold"; import ResetDatabase from "./ResetDatabase"; +import VectorCount from "./VectorCount"; export default function VectorDatabase({ workspace }) { const [hasChanges, setHasChanges] = useState(false); @@ -38,7 +39,10 @@ export default function VectorDatabase({ workspace }) { onSubmit={handleUpdate} className="w-1/2 flex flex-col gap-y-6" > - +
+ + +
Date: Sun, 19 May 2024 19:47:47 +0200 Subject: [PATCH 7/9] Add API endpoint for updating pin status (#1449) --- server/endpoints/api/workspace/index.js | 67 +++++++++++++++++++++++++ server/swagger/openapi.json | 67 +++++++++++++++++++++++++ 2 files changed, 134 insertions(+) diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index e7ea17d0..7cd2dd47 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -447,6 +447,73 @@ function apiWorkspaceEndpoints(app) { } ); + app.post( + "/v1/workspace/:slug/update-pin", + [validApiKey], + async (request, response) => { + /* + #swagger.tags = ['Workspaces'] + #swagger.description = 'Add or remove pin from a document in a workspace by its unique slug.' + #swagger.path = '/workspace/{slug}/update-pin' + #swagger.parameters['slug'] = { + in: 'path', + description: 'Unique slug of workspace to find', + required: true, + type: 'string' + } + #swagger.requestBody = { + description: 'JSON object with the document path and pin status to update.', + required: true, + type: 'object', + content: { + "application/json": { + example: { + docPath: "custom-documents/my-pdf.pdf-hash.json", + pinStatus: true + } + } + } + } + #swagger.responses[200] = { + description: 'OK', + content: { + "application/json": { + schema: { + type: 'object', + example: { + message: 'Pin status updated successfully' + } + } + } + } + } + #swagger.responses[404] = { + description: 'Document not found' + } + #swagger.responses[500] = { + description: 'Internal Server Error' + } + */ + try { + const { slug = null } = request.params; + const { docPath, pinStatus = false } = reqBody(request); + const workspace = await Workspace.get({ slug }); + + const document = await Document.get({ + workspaceId: workspace.id, + docpath: docPath, + }); + if (!document) return response.sendStatus(404).end(); + + await Document.update(document.id, { pinned: pinStatus }); + return response.status(200).json({ message: 'Pin status updated successfully' }).end(); + } catch (error) { + console.error("Error processing the pin status update:", error); + return response.status(500).end(); + } + } + ); + app.post( "/v1/workspace/:slug/chat", [validApiKey], diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index e0ee35a5..b98891c9 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -1999,6 +1999,73 @@ } } } + },"/v1/workspace/{slug}/update-pin": { + "post": { + "tags": [ + "Workspaces" + ], + "description": "Add or remove pin from a document in a workspace by its unique slug.", + "parameters": [ + { + "name": "slug", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "Unique slug of workspace to find" + }, + { + "name": "Authorization", + "in": "header", + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "type": "object", + "example": { + "message": "Pin status updated successfully" + } + } + } + } + }, + "404": { + "description": "Document not found" + }, + "500": { + "description": "Internal Server Error" + } + }, + "requestBody": { + "description": "JSON object with the document path and pin status to update.", + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "docPath": { + "type": "string", + "example": "custom-documents/my-pdf.pdf-hash.json" + }, + "pinStatus": { + "type": "boolean", + "example": true + } + } + } + } + } + } + } }, "/v1/workspace/{slug}/chat": { "post": { From 5bf4b4db58040e367c3ce5a76d4f482aa7dd1a11 Mon Sep 17 00:00:00 2001 From: Sean Hatfield Date: Sun, 19 May 2024 11:20:23 -0700 Subject: [PATCH 8/9] [FEAT] Add support for Voyage AI embedder (#1401) * add support for voyageai embedder * remove unneeded import * linting * Add ENV examples Update how chunks are processed for Voyage use correct langchain import Add data handling --------- Co-authored-by: Timothy Carambat --- docker/.env.example | 4 ++ .../VoyageAiOptions/index.jsx | 50 ++++++++++++++++++ .../src/media/embeddingprovider/voyageai.png | Bin 0 -> 20060 bytes .../EmbeddingPreference/index.jsx | 10 ++++ .../Steps/DataHandling/index.jsx | 9 ++++ server/.env.example | 4 ++ server/endpoints/api/workspace/index.js | 9 ++-- server/models/systemSettings.js | 3 ++ server/swagger/openapi.json | 22 ++++---- .../utils/EmbeddingEngines/voyageAi/index.js | 45 ++++++++++++++++ server/utils/helpers/index.js | 3 ++ server/utils/helpers/updateENV.js | 7 +++ 12 files changed, 150 insertions(+), 16 deletions(-) create mode 100644 frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx create mode 100644 frontend/src/media/embeddingprovider/voyageai.png create mode 100644 server/utils/EmbeddingEngines/voyageAi/index.js diff --git a/docker/.env.example b/docker/.env.example index 7fedf944..23789af4 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -124,6 +124,10 @@ GID='1000' # COHERE_API_KEY= # EMBEDDING_MODEL_PREF='embed-english-v3.0' +# EMBEDDING_ENGINE='voyageai' +# VOYAGEAI_API_KEY= +# EMBEDDING_MODEL_PREF='voyage-large-2-instruct' + ########################################### ######## Vector Database Selection ######## ########################################### diff --git a/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx b/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx new file mode 100644 index 00000000..33ce693d --- /dev/null +++ b/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx @@ -0,0 +1,50 @@ +export default function VoyageAiOptions({ settings }) { + return ( +
+
+
+ + +
+
+ + +
+
+
+ ); +} diff --git a/frontend/src/media/embeddingprovider/voyageai.png b/frontend/src/media/embeddingprovider/voyageai.png new file mode 100644 index 0000000000000000000000000000000000000000..4fd57eaac410eebba278abadf2a2ed882de7959c GIT binary patch literal 20060 zcmeIacRbhs{y(h9h|I_)t7Pv@Hlae<`|XYFO$edPD4E$LWN)&|D0|Du%!tga3jH3h z&pF@oJLi0_>-yvR{jT5b`r~vv-9FCSd%WJS=i~W!+}E@4ySJ4uU8KHjP)^2TqQR$XlCRIC;2BUAy*=XZ`JxEK&dX7-tVx`@cTK62*hIM?0V$-Q9Tj zxcUC&DlJjs?hoAU(f@Ygvi9!(_K5uh$)dUGwI$L|d7A*t=hozVGn#~A z(QXf{9T~Y8`6PLe3$%amPw(*8BmerQJpUIT{nyj}>6`w)ya}@I?ye6k{&6E5-MG#F zdc*^{d)%FEaTk*G7iOX8L{nLdb-$Wbj@^6p8%|L$PZ0_oYzW2}>Ep<%` z?dIg+ibDT$1^oRD2ow0YS8tg_VearNuwq>u)dlZ||gJ0dFM4FT^h_B*rJm zCn6>!%KMMk{PoNK_L6%}9w-~+Yf1C}|C?q0*W2}9U-e&a>;I;OcK)Yb zg{<;l+Z9<~_yw%5z`v|7JSHyd;cnyPDy@w+cVtvCcei?gwzrf-p8a3H`L}QPzg~;b zG5)0x|8a@l=Fb1E1mVfzf0c`q>pdqYdue%dM^AIc>sn|>OSCK6m0gnO-=FdSbiM!8 zivI06(3JkEoO%Ay)TGOTEzB`6R0&iSWbb*WejjVQ)XGTQS~h&@CH-{%aoBgfy`ERe zKQRLHPHQnS0^ObxVx0TW-~Zf$|G(4#Oz%H`zNbC(zO4AGt7S}}UY?QoJxR1PhwBr0 zv-)7#b6-h~xMeRW8o5s>%04-;)K_f1fBA~+O|cISA%S5t5i=J*YSgjtJ^y{%>dnSc z*U8(>PqfLZE8WNb>DP#QUym5WpSib3k2bhS-Gdl$gLclT4C28L?{OGqui?TE41ri_ z@HnF%#($phf95_oOQgEpxze^e=<^N(OP07j1G8Vu)y>UiYewuc1;zV;0itI#B1)&f z4y23)Gg?tWB@-K);?aEA&y+P?yawc?BeEp@l)N_4tyffH@+nLs!dXPRet$k#yh{a+>^v>e*Wb2)PbHu zm2|4wE|gU*!CT`={FCdeiye{Wj$gzDDr0pD70t{T&tqbe(9lG-{5joz&QhQ?LnBM9 z@NglOnS%rW^!JgnqhpEcT%F63Qq_Zz&ca`^O(ro{<)1Q2NK6+oTp{ZV48*{~!C}6B z9ak-ZQ}!9PkfaY0eR5V$MO0&>WQxH3elZr+x98g+91}HqaFD!dyIUY?F#kWi^w%WZ- zi8}6*IeQ!yhI@x19ySKo$BHQ5qpgK$rxu}dm52Zy)8;4v>wa(JfVoUrOiBW5fnw6r z`s+?9!VZlVt}C9hi7a?SLqoVkM70~o!#VZca!N{1JBFxNY2oY2%{4!gx_blm+0h5e z4?{W1FJ8T}WeTx9H&<eV>9py#FFDZtMQHksiTP z=YE%sajrFY@t72T+GSXqZUWDapNLJ(c##le|ApJ7rh%B-U@?atCXV7jO(KXn5~{@w3XH3aITsO zpKWF0$%$X8^rI=xZsVMxOywB*wV8T>a4}ipfQzE5$6F2C&dc589pw7d!EJ$2Q3OzK zqjtZ1^b<6T4TLbpg>Kb7-Qi}LML&UriYnak%exmzWLUd-fqYNDezi=M@FEO6V6^7@^r^f1eL1SDkxf@wMMWjq z|4~3d0PEoNhOQ!O^+<42V!ND5t0swNrX>%{+u|9R&02kZ}IB(Tnod42MAZMW{KDdEQtgMQvDcxeq(5xu9jq;18YgpDRICIL%i1G07=taP3qX#Sd5behFBK7 z`-M6kYCKI}KP{4A$=1R5-TU!lcejgy<0TIt`v$xNz0cZJD8k9FzCv>CA`Vj_`}^*- ziKfq#a@Yle(yZ1fY{uS_TUb~Wy~=%}!VP<6!)gtq%gD6tXti}f{9;1Abud01vWPE9qFGeqN}h{l+;4DmM*GE-cbJ+%WTJPh$PNj_B}t_ z&Nn-+d_*lfVOyQVhr)(ZoZ_+z)8Z5IPdwUghbSPY1UH#a`<>&~OReU+6>UI-qzS&6Z)EsUS zwpY%UqeA)Z$b&v-E8+_}OkGZO!@|P){GCRzY|?A)Q7CkSn6We|M=hs|5kI2GcK+N^ zS0_NGigw=ZWDnbQ90mr4m~<&$7;OVf1Zw>)TC6|#f6U&we#bL_ImMykvI9q4Z^QPoJt5@-^z$|mcqZn z-QE_A6YseU6`pYP9YUtgWTxywrKgsM(K7QBjeXk|4tLek{vN!gBx9 zUtwWkH*IXL!<`Ko9K5?cW0B^Hwn<5Ki;Ii9Svnz~fQByr_3Kwg78Wca+tD5OU8C-J z4qfM8`wuG5P3|mwKz6ihEMpHp{ll*$etVy;b(_%H4pIDRaa$#X(eng8qW)A`!LU%h z{Ei9p+TvsBUu!v{KKuC{2LahR$3Isyc44|?Y33-=-`HeWMmo_QnNut%o?cHX)XvdS z7e{I1;xK_+wm&S(^SHQ>m)Gtrb}s4Q|TS^$C1CFJG1*7ZEe25OO?V@aq>}aQX0Ha zUS5uihbOD4`9g8-N!azdGILB5-%TN+_&p**2I;50?HNTF1>;yY31C z)!4gwdSS5h&Ed|esHw@rt1VdXiYR!{MX<8il9}DUP3ZsY+cjD#pUdaYoy*tE!N$Hs z9fEX6c+vfm)$1M`;~gP*WV{deEG^M!b~+lfpRCVW@L*6;3fW$Md#_-z=89qh`t|GA zX3O32Rt~pES81FY7xpzgV8X5!mZ}#?MDd-R{1~Wv_=C-$%4VU;uKwbXF!Y{sj}0CB zsVY3L?O8>C|K>?sq62khuKC4TwUo)ohzRGOduXG2*9+4%4)Ql|Vx;@+sW$EptC72+ z@LXnF0%R&GDvTBv%UJF@OjXGi>Jd^;De8YxmrN|Z+fIgi@gl#kRi*Vn z=VMG9SQd*FC2|*))q#vI=;cFscNk*A@}wsADKw96$XZ8TWnwx%Ao+tp`sY`&NS-Ha zEQ?D^d{AMd1=?NAJh-d0Po6wE-XFQsN1fefcGJixb;MdP`$Ge45K>aoG60w_A5C=N zV_-af{5b!9AqCuJQs;c5a#YHanB%(&%Xi$o| zvM5DSK3?fd;g~49^5Uw~x8Yo5Uhn?iZkgVHf@j|3d&F%}WB<+O;9eLYx0`t*?3ncD zPy_;2y{)insZ;%y`bt(-Y_MZ2Exs(I%IA$Nd?1o(k7ZSB9G|&1wYWK53nM2CU^ony z3#|+e3cWUpO|Zz9pdH*aGt0E_5uzlxe*OAQS=n>^{QS>fzCA&eXX`bF8G&Bs#IOvAt4fX}^{6`LkYR zfG!}x{qK`x06wd|Z)=98h&U+?q)UC-a%Fdr-S=#~HFyqn;dYAfCBRwAR#tEBoBF`V z*Emc+{Wg;4`#QCPb7{UU7%3eL?U$bMH}3P=j$Y3*y%WoVR1R!kX!8k^_F4&OLA!AR zWWf0BuVvxZ6JIdr7NjKj*x%oIcKW;As*jq5m6g4cWg+kDOcJHV*xUPhE&ft$8m~gG znEDM})vgmQLH8tlu6MhJO+Xpg+pGRE+2@-tCp&xF+xvw(XTP@_mRU*D`k{(4$80rf zyuPNQR(g|@L|w8+Ol>)nk=iag&Z0Nnc zy~{26`PZ(-0+MPkGygyg9K`2fo!!oJEa=))8Vm+4Ez;&cXTE+v`b2J7skm&##*)6y z&0Q!gZGI12r2ABo5(OnK2MtYQeRkO8?CcB|7xyN#HDY>t!e>gsJ3Gz*QbBIQNTtas z9~mBg9v}Y%E(~_(^*5eF&9$sVR@Fwa`Mqo{#eb`Qm6)1({hsLt6kM5E+xhR)wF8#eEaIKgkA5K_8Uf-!)-=S` zJjALDxZrzTy3}#JyToVy9_!?IDduzILxIE1=}_pS%TDb-BO@cr-M?w2in^SK>Q{iC zKu0Rt8TBc6)^}Sbp;N@`E1hf23cPZMnuK`s8*r23wVW8|#df@xHQDLmT^$|f^R16<=9-&G zm9w|j)R?EV;bVtt91PVnr0K5RNml@L4r-vh%L%2m69g^fMsVPakneVSfNTia!|pU$Yk^HNq;R=$4a8(m3d-Bz(Kcy4@{ zyXF;xVu}H&qtr2BPF2!(Ufa=tJeFpge02uW5)(s#6P+(@{`GcXJWF6l{CAAc=$o?= z!BjVx8h9-ydyx{6d2xA}nHa!#gs7Z>dIE8_SR2d|*62B8>fc?8k+ImBZ|fcwUBJ9- zCy~f)90620)3DZ&&B}j>y`WENJ`dHnKaCnkCQ3 zC{b(U#WWP0dYQeVYBIXjq6m3egWqfj;GdZV0mCB^_P%>PF!*tD6 zph>p%lmb>tm&;kwl_W!I3b9*B<024^i{`275!LV(ps`zuR`Z9~# zFoXwz-oiVPFe^v5!!C+Ix=P1eT3x_7TXQWjft+cWOLMmUdCX<(ySvx{os$>u z=}R<~af7EKY{DlrL2EV$qX;>;$Kl(;A~A6j8Q6pzLSUwN5Dn z{BTotfW^N1{ti3%TzGi6)tQeF6gUi;?WUu2weAJnR2qSRKDcM4O^2_TzOH+8BJ(_y z!9PqflA>*H&h*#=-K>*_=ml$8zb1#(f>aW}kkGch~eeM(!--dQklKRe!n zVixuMo;0oG0M{fWL|&*{DvbYX2zaL81Fa`ym%@H@c6FipQ^mT!YlantVVjm)T$rkQ zuoTyhy(rDZ%)E<8B#(aW^QEX3mdXU2`4z!NF*=a8?x$5yQktSAhPm3C#D}R{V&tNK zc@OC{HbYFy0%o-Cy|0Au0T;MTuKB@uRk?MmLf>m~iblk-{a|B4>XnLW>BQ0(uelD` z8NDa77{s)+QNS`RK0d$7R;h2DnO;srCyppj3ZQoY_h`PDdjTDl1b@U}^EwN!Nat?O7?w zqZv1=4YYDJZ8&tMckNySV#+=~;=m_yTir7)zv=C6j~5$qVH4C-lk6P)+9*Z+KHS7G z<1Yhv$7N7;Q%>&uE5QfnTRq;h_S2A)KS`7D%9al$)T^=Af6}iHgWdY_=AD5T%*s1G zJT2EPIDZB6p*k~_o>6-?sa0^IK9`wOpx%sjAsDVJn$GQz>zKfVW6k28wDbJUlpJA_ ztnY-TIcvv9-zQPAyh>5L89wU;7fpewz?R15Y1$`+fvGNj*SyJ|onvZ~5eTJ`ZdObIUgjF-5a9J@6B=$Fu8UZPWU7gkH; z?&wPq(GZV;`Pg{87|ES-DYE8ed_4N#D7WYBkj?X?B;2&ytFiq0^6gjrPIkLMJmLM_ zS)F?-r|r2q_unVe;F*xrLw;;7Hkuku&_kV!nr* zFqiYN?uLhcsdGW0i#;EbEN4h~abqOm;jhv{*Qt8+9FvZgd zp}0Rlq0y4c^@}OR&_Bz|@HhBrMD20!2XNEWLq8Gw&w+q8lJp+F)hUjEp>grWix=D} zX_&15HRCzM78{;kqA{qJ19Awu3JQ?QZJw9K6%mXqBx|p06wck9;pXGxb2Qbt79{)U z=RknY^}$}MJ2Es)<8yCU%^ooi1Iayg*;puyTWTI)s^vz%xO7_=joNIxlX%*tSB2zD;v)vH&@eRh1B=$)E>vH{CC z6F!gbtz*-%H(M&fhB_5-UmyL%dFib<(`n~38ogS_G!)02{s?QWE|yy6rg0L&3V&OuB^ z=fFoH=V#LBUH&2X&E9*;4=|T@fzle%*#IuUdG!0xm~|7j8N`65Q~vJP&D4O^{xo_Q`G*oFTx&sydd$T1elg5obK(>( z(U{F0fbC4ZTQ|R$N1z@MS@pfkiHX2dEMN<=k+h%hqy#!RRAMYq8(#0s+ITQ-NP)wV zy+wsWC>0`FhUUx`LW2N7fdMdl^{=3ao9zYKc}yG}g+|$Iy&i5Wy_)@36ATZwzR)an z#prHzOgcNN4+}sSU9Ox5)kpc>z01Fjww;7+v=|DZkbRFFYwO}+%kA_E3?$_$-Z%B_ zj$_xs)ZxHyYiko!E5%@EXFrB65&yW>rcmRRplpNZCT)pfXVlMgPYI|jhHLXl<`_hs zZ%%r(-M`mavw2XguVCT@i)O2HjpSTCD0j=10wm{uL)%Tp;t1Ct2eii#H>1=@VCw%f zcaiMgTyRo-y)TE3EZ|vQ*RLum&0B!}IQ7a}7ujfQ%%ZYngRzq&yhZqnYlctNh2j(G8+|M>spm$|RJOVOjE_atUbl zR8bP-rcE~2I!v!PpNd~x?X2pqb^OB5UU;0QXE6#L`PSq@0Z9V)t7Wlh&K_uaR7f-W zC8rw8>$ai}T_ACzF^Z3XT2KW~uWChY;L|5fPKR~|@1{&lCD|091&WnLtzb*Q8Y3FX zNGXqc56ovaRb33!?KP7opZoddE{?s;2d77KhCgg|F3v73F3zv6TK}j+ z^bx>)gkJkmeEaKT_)yED7)h55Ml)KD7x6(Q6qi(!@jnq2c9^ns)zDAivm573>=#s= zTv}R+AISa=eL)GB+f`N4x4Na#pI%<$Kyy4ZDeXe!G+2$q>T{aou=8HoFESHb*^>0? zO%B;sTe|Y#Ky3on{M_JG%0Ef`G94kM5Ba(=Q3P;L5mFjIkPv+?YWMp3vvMO@G)U2y3de*23ju$ppU{BZ2gU&6*jsC6Y5?FG;Z0f=zp zf4+g=NeR3*T-;;*dWIUa)wgJF6d6dH<^T(TBUFy1_NUSuC8bMvIVNw^jRhQm$g}_r zM0oe$LA3$W%-J?JjnA#ph7!_gxw7(F&6tV_3SNd`pyj&c)=zTfO6dAnq2Xpb^Z*c> zih3iA+CW)y{;{GFso5UIC+Ijs30HCup8P_57vEe4XwTYEPF=vCX4TY4U}V5{(wKrk zdI6QLrjJ}0bT?7wc?>Ce^7yxGZ*HdP#(1Aq^A9iu(7v~=6k|@>Vy1;@V3iueSx9^ z=bmpmn$siyxVDVva*lIsEAMm#_!yBll!&l;x#_wtkQx` z4zQ13{~IPoF!m)m=&`B7sC2|4JUJ;MBnJBi8DO-c&aKu{mWl;}L4ddA2cl8kapOm1)z?Us#R)WId`=I7of4Q zHySDLM(3&GX`X=ebepS?MfG?glp*eM zS*~>#rToTtahj*GQ|rj~J+z!&>UgQC(~86qsI9C301o%H8 zCHP4iuLPn?sHD%n&1i}7w3Tocm}Gv3lh(KEVW*3bKR$o9hJnb)Nl8f;y>RR6>qQ^# zz3%JO%e_Tjy5Eu6$w>u@ zG*a;1iE26%xtBc;300c;@t}vhHXsXC7 zJW4Ju<=PVEBYAhG)hi8~d_0!F{J7A#koMNzsL6*sk;fFqo8{FV8xF^p<$QWu2cT<7Ez+-*7E3SzX@LhfxCBNAS> zCxd5(x6gexdcVLuikVz7T=pg}4_W;gj}J*MsLl7P20pIN9y;7(ZOdLXYRdqR7=J-b& z#rl>W;&h%{GpcIDnxC-fc%|E6R2igIN8r4A@!~N!N%0MjKUG8lY}MeCzZk;14_9Ec z%JmF)zH;0Jx*;XyTAbbe%l*%iQ|+7TQfbIuzRt_j73Eb*K=-CgOHg<8maj696Frrt$r50E4NbM+N zOoQK+%DrN7{%E4*tXlobECAwoNuvJO6_5TVK;X=9nnbM;)+o z;;Y6wfB($)(mluGL1#{k^jp6Y+^BMXYkj8oGc@z59KKD{r0!tu0`t3fj0R$ZiA~U2 zIUsE(g!7nkAJH~|NOyrZy91O;_oqp_MB~XG((mef`id6Q;ZuSUB|kbJe44c{4YkH> z8$g~ZfeO_;NCTE|-A%`VC!206qKt@;u*99t=pl(m(Zq)4-yDGDYdkze1qB7rh~9(J z>jInDezuVccx5|aQC^Srd+i+^Ss>FuU%v>7ILE1a8Xq)G;mp^s+b!98DL;(dk#P`W zj}`ES=s=u$R>Ew6e!`2iv}XVo>R&(7VVh9TthD18Bq@m! zT*=JL%+~ce1DcS+4Gs0|qA@g(1O&~(t&429%irnz=j;)pi~>35I(nqzc_TM7GSLC} z_ve;?mJX7Nu)1dNT_*6ma*IrP@cW;AQW=Nc<7)CZ!5GxcF9nr%ebrqK-D|dPCjD`E zrZ@KDbHChIqd(s(2aqrNb#*nR={WZS$nK#3n}aujNS21ZR;g~l8kfL9V=Lr^i3+8w zW2MC+^gcio)k*MIN6GsrrVLJOjpLUHDgmpwhTaNJ!!J3;8sHz*muOOvoQCGgM)|Z} zkFHD*$poIo%~Qw$NgN3Y8iq&vFD50f&BVeY)ru!okw-F4rI!W5fQ;-JXfID_H+n>X zB0%;MfT>hz?^4dn{afs7fxGB+(3Wk-i^4(pj_iV{0SL7AU*3@;qMq^Ua_SPW$zz!B z8oisTZb+p77x0*p0);X#Fpxjy2b*lI$e(HstSacI-XI1*`IugxDEn%P9W~(pYyX?g zR8_%9k9j0d%Zb)xpGF)jKR>DbQv#Q|+mtB86VoZudkp$FJ3wzOKvZ0t zC^gM3Ioi`IxFE&JNdVS$hs1Uxs{Cst2^kBTsZ^&?c#^pWjcngLqR&NXTc&r*6@peu}wo zbw}BJ03rVF;T7Wi-&oMDVf=MGdIN@PKxIV*cpF;7B2#*qg-+8jOKY4bxgw`^Hd-$Q`wZj1k;$n#9pmL?|jfQ(GvP$jvc z(sy8~9B$RP12@UMElutQy)!^FOhDWj&@O#KWSwxqx(-EG|WY4>!D(Uz8?T*B|CH7B0f z{8&`3;(PIs7gS{^0eN8E;I<4wpMX(z4hsHYo=!9rxuLzu0#=*1Vp*^*T(}C^GSEZ7 z5>W(8UrtUgb&rge*8Y%;k`fmLh!oWh2zWsSgEfB1>;4N}%?H+NtqepvL&^*tpa9v9 zVf9dc01CGYh%x6@{0-p_Hfd>kKyqwU-Ee21I=0+jSVYCwcN!6>xQnOtZK{2FBEC35 z+FxPznF;6<$ZL0l<~bLlMq8qh;sT+q|8?aHG!;ZIO7F&10``rFcn?m%V^Glb2yho? z>T+g~Zfw;{8Cu#VrtIv@i_zmqAPe^L@bIu?%|?buc6!0DSK+z}KR4XPD~0(i+HuvB z`4#H}bVp^sU3Y`b)!n;+aPido`uY^pZ#jED4h$&!`AH&PKwf-q#odAn#>X1J4mLtz zzTErSAFICyI*=$3h_MjfRGN_|Ta%StmYa}_Ba+&GE>F^Z2>lv1mSy?Ox+^TmJZu-6 z&t+N45|>DlO?|lDQfo@(Jy`SU=?21OpR_3$He4X8gZpdvz^k*M(SHt-l75;}nILu&z6Q1yGJ|8A>tn z$QW5zhz8IUt@V2Z1p6JQXFy|15cCDcdUO`g)u zrEqT*4Sq0`eE~X7FrYv^BO-P`V@NCpAij0vs(BRqWYcKS)gFFu3Dzpo>OrfuaMNKC zF93t=y8e`8c$-<-WO;FA_6AyaT-9@pFVwJd+f6~hj>*0G_88vMgt0?_aHk6 z(wlhNZT$_-?10nL)1kNbIS|jzRv+$LC|ulPL0-D+8a*W{EIeHE%*nwF4+F-(JbW`+ z@fHD55HOoTm0%qR)LUdxjg4iNfieQ>ua!_hx-36hsFHW1nG8WYAmY$-Ohdc}$^F3e zFf^_(f`|feaC2}}$(v2kds(_ZvN%!M(!PiGv=W{KK)H1!e#6kV05Xn%uuu3rSf(u8 zcVqU!_KyH3zu|Q-$V0$yAyZRnGfvtC+!3;51PszM-_Xuj{bCS8(qQ_-MM6UI_{ROW zb@rRJB28vdd;rGu!!dO6k3b@b_l_F=Z16tE(@vD%89WLYYB*3}tAiMEd`ta0upM=d z$cc~Gw8#?)Puy4mnnN1kGbA4S99~eNY)0NZGwlAl@pWaz0{R2nICpvB6e>mvyH!9y zx04c@Gq24cqaTPT6u+;M40_MEoKhA=b)mHT?jF$MiQEFl z%{ICN76osR-K`v{TkFKH!aRkXo0SaofR8;r^8WsCpywh)L-H*d$n0FkFx9FjM-22u zSL>V?&POxIZ0$S>dHNKzX$s*9G6>5jvA zV)^koKN%6NjQ@<~&u7PH(3OOfje+BXCv)TBw6QA0OdUMAA&UbS-*S4GqD+&hl+*U5 z!K)`#=y2=HbQa5v=xT|8*OpPC zfP-3?nWL4E@nV_Q1#Vz6R)u9 zB5ICCweLVn{+J{C0y;fdBX<|Giu7ASQ35VtQ(^cfCyu-_N4lS82k&J=X274be1qy6 z5OU(|w*pWEKa{yH-)cOZvg=kehEfm4A(G1(x=#0svj>K4aD_(!$G6!g3F$v4ZiUIu zMMad}nKpWpAaSFf9u(fLoN2vlN48nSyEqm+c&TT7QsTk+dU1^c&K{LJce;Wu;Hwvj zzhBpC@O0>q%pbFj50P9nwVUj)ZxajEDeM5r6G8xJ9ir_1Qqz`bXy7OiqC90Fv=j&o zYYE6uWztQB88--}A#S!+-z%|uhY^e0+lX6&Y|zGQol(gL-#=|ImR+9Mu~v>{p%rt* zLSmC!mjFV7RDtNpN6vW9077}qKEwgFjAF@0;kO3R9!N~^nzaVHE_dduTM!;U2OdXE zOdJU4Kwmu`@8*Nd-Q8V?Q80n+*h={Uf*;QzOuR^wz=8*1j@Pb^y2D_Hj27w=LD5&P zlR=CTWK>E%ix9OQi%Xg~Q2#Yaq}p*VU}$sSslH)BZ+|*Fwvh`$IfMZ+N4{M)hqnTc z{TLERLq+-mDQB~LCfW&TgolAf&iA{C@-8HF?B|;40K=6}PjIP{Lc|e44l_-@6K@{K zz~p?%rt!ASY1pK_^*Tg^poQx6EN^db-_7GHVhUl0Ov_W9LLt6dw|+jy8BvU~295ex z5L|Od~rAauGYdKG%WTk{q?rq;ZZJkZuDf2;++D{=0it<8$>S)lfDi*WIE5|ff<7cta~t|`<*%IkZagK7`vDlKHDI)46G`hNf_sHm&EH&9M>*K~;~-@Qu=d%}404#CBy0)a!pRGMI?fU}?0WPH}W zNrMGoKcnAS&OEvP0WhEgp)}lU5Tu?bC+nAN#Kn8b8+92@y|{z-4mS$i_Lwn3Q-zys zY-~Yp(I7~_VqqtKk!s<&RpJ^$Ep!plL_`a<@jD@=1RwzbQla|c5w`}&*1OPX zCd*%pruqfSl*_^T{oG(OifjlWOxgzK2oP6}L1=i2zzj@x1lx~|k&TBvdsZ#>9Nn8_ z^8gSTg~+foE81ah9U{4d1jd{J%ux<+^7B7hx-vH>qNzZ;lXG{k8qpTl#p!u2 zzuu5tovHhv|6s13MRg4#xJW=Qb|ZmX9`X%C;AC{n6)Q>RKzq9YgSMif829BFgz?m^ zY;sF4VL;XcAE}9uhH|3*hsgzgbaX1@SFZRz`}uhUCg9o0Lr_>gan_9hLkB-K*7(8O zFX3S+Nx}}V#;o&&8z5WyWx-#&bYi;d7H?;xt=XD(5aG!S8!O1YP@E36G6eAgjf2hJ z&M9gIMmwYrU4BPG_AY>Rt#6pqI00>GWn}>q!7+v(VjG#U97b^(?wPSH=HR2RjTI&u zcB5lVd*TfrZB5tSDPPC98=DeEE%a7hH&B4s@B<+wZ|h>Kk|Z<$od`a zNBcv{IzRd%2|dA|IEEhfWjG85S-WwwU;UnboZ<-$J$*EoYVQ<#DN)_cpK9*nmsAq_ zPauZsvc1!6qu5lf({_y~vnnau?NmK0vniMw#n`d>a&?3d`uM{L+MlfiRv^Tv!IAo$ zbxvNf4828OBIvu3;Z|a~$Q(s;la+-fsqw?2u^UU>@RzVQ2WWFy6B|A5mEzQfC|*kY zvrDKbtS@vBZXt%5T4iV%qK5<<0qFctSpo&)6189}Fo*anD#nBdA0Hy_@Gz_HRjc;^ zEETo1Vw0UTP9%^KnAXBUoMUBZ3Do5D9q@J*7Q~!llZ}g_5-E4S?_;LM+j$pcR>^}= zZ5@riR5#$9i>7~m=_YeW3her5d|a z+j4pdhK(Z~#-xD!K!3mIx)9hWkokk;NuQ3w{@M`cU#7>w_s4us0bc3%bzhsg+7-jl zH6Zin`&|i=H{;Mm4WDseS?PoyP>AK>RgzsKykbMc$<1A&;^KE6^tRa8AOPS=dZO)a zw0ss=Al5dydU6x1af5b9)apuedHg9LrK=$HY^gvDgRKO^zcDr~HAK&5E1t8m)9oMg~JCMD8B`Skb?9ncRf}!cX*&ObZ0@Bro#HDlLF_ihyggPUYK; zO>}lXKJP|)^ro;1ONMi|FJn7yZRuX>qgKo1`%q9kyou^q44Iy z_Mgp;TyK{WQ%BA;2Nu;Jc+ITO z1{Y#a>R78_?7-fc2mT}n8`8qS_41HSM~NF8Oqq)(yc3qBlFbjEG9;FIP?gVsV|Lh@ z;q{9lC>7$LdGRg~QK<9#SuuIr4bmQNhMj_Ha2HxmmU$2%s;_z})eXtIEKkjc@}c-s zK+HmNN)&UfL!Jp-h6s;cc>e^y+rBA#;3gIu9dz^crgP5Te*(d-OKH(fG;CzZ^=j8H z)uuRL@{fp&bb1!F-h8sBhUC@*AB1)$xpWlBT6VP~?N>l2`~U>i!TEwQy~l9-#r6q-HUTm;XnG?W z13oh3?)l9~&YOUwS5(9ahcXzYr0C#_)IoY25W@4+)F=pGE~+aw89DvWcdSFR_a@IQO04=@-|W8_MI>ufT#4CJ!yZfiUFBr!N$+ zJ&;&EdDd!0lf@Sr{k^)+p0I#FFwoY=_+bV@U5! z*Az1;43oD;yFP=$wtiS#`$8Ai% zs}J=9VF+a-BcpoH52)U{-Cm$J&XD9%hj{pf3l|1$t8euN*~mcX5+(^5ocsy`=!LLi zr{T#OAV_)0F=4pbg}{aeE$OSvV^{FrRogf( z?7WR3kxK(nXf@rp--3pC?n{$zZLcZ`s#Bv7kmTyVuj}QEEAE|?)FO^iFnY`Z(6R9( zd)pwvUBg7$cpRWkfEarrM*89DC3#S~paM%d=d==y;Z8v62sxAjhO{Q|I>Z)-Lj~Qq zLNYQ85W`WqeOp*8)F)XBkUDZ~1Pn8B0JD44_?AJGUY=^+IjiM631|!b7t{+AT1k$pMrm4YM?G{qQd1G)RkrFpf>EscR zF3B@OyuuMkKAMfOk`fcozc@~cXcbciS?N0*9)1d~&KyWpSafuakG-w!+dg5Yevh-0 zpL-P31t;uSbQqxDgd(Kl^|uilLGSs;ID>#)OR0pWgZNO?oq!;-0mQ@2M0k7{5H9wW zfnlb^jEor2JJ2j9)I*G8XSZRUU@{^xRmAjQ+M69DiI0n$2Z(V?UqS2TaE=Y+JYaQQ zQ}EIHO6KNQR|YbiTRIH#cXQ$k{4*)a3otG~iSS7@OhNchkjG zpr(+9B~ABPXFu8bK%`gad=28?IGH#*={P;K;ESm`}#HIeIjCvrTLq& zEQojv3#GHC9%oH61)Us_xIxvuI8W>Nqn`;|+DO_DHn{g_2-hM;`yQ7an#Ax#hfU$n zb!@-&R)qtB%X8LnOtyN$}^1mV!+zD{;&ZMP9$Rdf5&tBX%6 z*Q*eTe*(=1VYKEIa0L+QTU3-1)|KnXgA#=l zZBTIVP?kJS?r~q;zNtXBDB*z>xkg<5&V|o?@ zzWO3^6v$-}3kon4dMi5zX-E<-oRS=V@fJtTRyP7do7#DV0O4bhN&ujU%+v7j2Fc}d z$gqH|0d6%Y2=KqVMn**h$2}4DH6g#8M*ZDf20)#;nXI;L^o&;o2Mdc)SXk#%g6=O9 zNH~GiXxvO*QOOP`X+VGDdN(#D$xb#mDex7;ffe(k7 zMDW+nWeTuU(yae_zvw0zc{EyN?Z-o@8}$ zxM@QN`mgFI-y2S;q3xy*wbHF^H>ByrGDJovGHwm3d0mwIj2Glg)O>>lYvIyK!QcP= zhWPG{C^!|%h%5K_KmVe`|JVPr#jyqu9fFD{J12%m9SYHgDGrPHl!OQ55 zK}jr*L4Zq#fgM7KfhmU*crFXGHSj;b{?BdsKTr;jF)mKiKimqjNQTnHP*uFGP$Fj* G^gjUK3_@f8 literal 0 HcmV?d00001 diff --git a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx index 8f234b5a..5a0f51c1 100644 --- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx @@ -10,6 +10,8 @@ import LocalAiLogo from "@/media/llmprovider/localai.png"; import OllamaLogo from "@/media/llmprovider/ollama.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import CohereLogo from "@/media/llmprovider/cohere.png"; +import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png"; + import PreLoader from "@/components/Preloader"; import ChangeWarningModal from "@/components/ChangeWarning"; import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions"; @@ -19,6 +21,7 @@ import NativeEmbeddingOptions from "@/components/EmbeddingSelection/NativeEmbedd import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions"; import LMStudioEmbeddingOptions from "@/components/EmbeddingSelection/LMStudioOptions"; import CohereEmbeddingOptions from "@/components/EmbeddingSelection/CohereOptions"; +import VoyageAiOptions from "@/components/EmbeddingSelection/VoyageAiOptions"; import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; @@ -78,6 +81,13 @@ const EMBEDDERS = [ options: (settings) => , description: "Run powerful embedding models from Cohere.", }, + { + name: "Voyage AI", + value: "voyageai", + logo: VoyageAiLogo, + options: (settings) => , + description: "Run powerful embedding models from Voyage AI.", + }, ]; export default function GeneralEmbeddingPreference() { diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index b6ae8cb2..35358636 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -28,6 +28,8 @@ import LanceDbLogo from "@/media/vectordbs/lancedb.png"; import WeaviateLogo from "@/media/vectordbs/weaviate.png"; import QDrantLogo from "@/media/vectordbs/qdrant.png"; import MilvusLogo from "@/media/vectordbs/milvus.png"; +import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png"; + import React, { useState, useEffect } from "react"; import paths from "@/utils/paths"; import { useNavigate } from "react-router-dom"; @@ -292,6 +294,13 @@ export const EMBEDDING_ENGINE_PRIVACY = { ], logo: CohereLogo, }, + voyageai: { + name: "Voyage AI", + description: [ + "Data sent to Voyage AI's servers is shared according to the terms of service of voyageai.com.", + ], + logo: VoyageAiLogo, + }, }; export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) { diff --git a/server/.env.example b/server/.env.example index 4be9ab75..e38250be 100644 --- a/server/.env.example +++ b/server/.env.example @@ -121,6 +121,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea # COHERE_API_KEY= # EMBEDDING_MODEL_PREF='embed-english-v3.0' +# EMBEDDING_ENGINE='voyageai' +# VOYAGEAI_API_KEY= +# EMBEDDING_MODEL_PREF='voyage-large-2-instruct' + ########################################### ######## Vector Database Selection ######## ########################################### diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index 7cd2dd47..cbbf1f23 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -498,15 +498,18 @@ function apiWorkspaceEndpoints(app) { const { slug = null } = request.params; const { docPath, pinStatus = false } = reqBody(request); const workspace = await Workspace.get({ slug }); - + const document = await Document.get({ workspaceId: workspace.id, docpath: docPath, }); if (!document) return response.sendStatus(404).end(); - + await Document.update(document.id, { pinned: pinStatus }); - return response.status(200).json({ message: 'Pin status updated successfully' }).end(); + return response + .status(200) + .json({ message: "Pin status updated successfully" }) + .end(); } catch (error) { console.error("Error processing the pin status update:", error); return response.status(500).end(); diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index c8e239f1..a5bb6a23 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -426,6 +426,9 @@ const SystemSettings = { // Cohere API Keys CohereApiKey: !!process.env.COHERE_API_KEY, CohereModelPref: process.env.COHERE_MODEL_PREF, + + // VoyageAi API Keys + VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY, }; }, diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index b98891c9..8616943c 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -1999,7 +1999,8 @@ } } } - },"/v1/workspace/{slug}/update-pin": { + }, + "/workspace/{slug}/update-pin": { "post": { "tags": [ "Workspaces" @@ -2037,6 +2038,9 @@ } } }, + "403": { + "description": "Forbidden" + }, "404": { "description": "Document not found" }, @@ -2047,20 +2051,12 @@ "requestBody": { "description": "JSON object with the document path and pin status to update.", "required": true, + "type": "object", "content": { "application/json": { - "schema": { - "type": "object", - "properties": { - "docPath": { - "type": "string", - "example": "custom-documents/my-pdf.pdf-hash.json" - }, - "pinStatus": { - "type": "boolean", - "example": true - } - } + "example": { + "docPath": "custom-documents/my-pdf.pdf-hash.json", + "pinStatus": true } } } diff --git a/server/utils/EmbeddingEngines/voyageAi/index.js b/server/utils/EmbeddingEngines/voyageAi/index.js new file mode 100644 index 00000000..b25d3208 --- /dev/null +++ b/server/utils/EmbeddingEngines/voyageAi/index.js @@ -0,0 +1,45 @@ +class VoyageAiEmbedder { + constructor() { + if (!process.env.VOYAGEAI_API_KEY) + throw new Error("No Voyage AI API key was set."); + + const { + VoyageEmbeddings, + } = require("@langchain/community/embeddings/voyage"); + const voyage = new VoyageEmbeddings({ + apiKey: process.env.VOYAGEAI_API_KEY, + }); + + this.voyage = voyage; + this.model = process.env.EMBEDDING_MODEL_PREF || "voyage-large-2-instruct"; + + // Limit of how many strings we can process in a single pass to stay with resource or network limits + this.batchSize = 128; // Voyage AI's limit per request is 128 https://docs.voyageai.com/docs/rate-limits#use-larger-batches + this.embeddingMaxChunkLength = 4000; // https://docs.voyageai.com/docs/embeddings - assume a token is roughly 4 letters with some padding + } + + async embedTextInput(textInput) { + const result = await this.voyage.embedDocuments( + Array.isArray(textInput) ? textInput : [textInput], + { modelName: this.model } + ); + return result || []; + } + + async embedChunks(textChunks = []) { + try { + const embeddings = await this.voyage.embedDocuments(textChunks, { + modelName: this.model, + batchSize: this.batchSize, + }); + return embeddings; + } catch (error) { + console.error("Voyage AI Failed to embed:", error); + throw error; + } + } +} + +module.exports = { + VoyageAiEmbedder, +}; diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index d9a1ba09..e60202a6 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -125,6 +125,9 @@ function getEmbeddingEngineSelection() { case "cohere": const { CohereEmbedder } = require("../EmbeddingEngines/cohere"); return new CohereEmbedder(); + case "voyageai": + const { VoyageAiEmbedder } = require("../EmbeddingEngines/voyageAi"); + return new VoyageAiEmbedder(); default: return new NativeEmbedder(); } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 48c98e95..40154163 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -350,6 +350,12 @@ const KEY_MAPPING = { checks: [isNotEmpty], }, + // VoyageAi Options + VoyageAiApiKey: { + envKey: "VOYAGEAI_API_KEY", + checks: [isNotEmpty], + }, + // Whisper (transcription) providers WhisperProvider: { envKey: "WHISPER_PROVIDER", @@ -545,6 +551,7 @@ function supportedEmbeddingModel(input = "") { "ollama", "lmstudio", "cohere", + "voyageai", ]; return supported.includes(input) ? null From 47056702382ed208c02a3b26d9440b5ed2ec4e80 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Sun, 19 May 2024 11:51:08 -0700 Subject: [PATCH 9/9] Update Github connector with more obvious alert of Github connector limitations (#1457) --- .../Connectors/Github/index.jsx | 118 ++++++++++++------ 1 file changed, 78 insertions(+), 40 deletions(-) diff --git a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Github/index.jsx b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Github/index.jsx index de6ed77e..00b1cc46 100644 --- a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Github/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Github/index.jsx @@ -3,7 +3,7 @@ import System from "@/models/system"; import showToast from "@/utils/toast"; import pluralize from "pluralize"; import { TagsInput } from "react-tag-input-component"; -import { Warning } from "@phosphor-icons/react"; +import { Info, Warning } from "@phosphor-icons/react"; import { Tooltip } from "react-tooltip"; const DEFAULT_BRANCHES = ["main", "master"]; @@ -92,45 +92,7 @@ export default function GithubOptions() {

Github Access Token

{" "}

optional - {!accessToken && ( - - )} - -

- Without a{" "} - e.stopPropagation()} - > - Personal Access Token - - , the GitHub API may limit the number of files that - can be collected due to rate limits. You can{" "} - e.stopPropagation()} - > - create a temporary Access Token - {" "} - to avoid this issue. -

- +

@@ -180,6 +142,7 @@ export default function GithubOptions() {

+