diff --git a/README.md b/README.md index e15f7ff6..bf50f209 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,7 @@ Some cool features of AnythingLLM - [Groq](https://groq.com/) - [Cohere](https://cohere.com/) - [KoboldCPP](https://github.com/LostRuins/koboldcpp) +- [LiteLLM](https://github.com/BerriAI/litellm) - [Text Generation Web UI](https://github.com/oobabooga/text-generation-webui) **Embedder models:** diff --git a/docker/.env.example b/docker/.env.example index 70059ea5..7fedf944 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -82,6 +82,12 @@ GID='1000' # GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=4096 # GENERIC_OPEN_AI_API_KEY=sk-123abc +# LLM_PROVIDER='litellm' +# LITE_LLM_MODEL_PREF='gpt-3.5-turbo' +# LITE_LLM_MODEL_TOKEN_LIMIT=4096 +# LITE_LLM_BASE_PATH='http://127.0.0.1:4000' +# LITE_LLM_API_KEY='sk-123abc' + # LLM_PROVIDER='cohere' # COHERE_API_KEY= # COHERE_MODEL_PREF='command-r' diff --git a/frontend/src/components/LLMSelection/LiteLLMOptions/index.jsx b/frontend/src/components/LLMSelection/LiteLLMOptions/index.jsx new file mode 100644 index 00000000..6199ba26 --- /dev/null +++ b/frontend/src/components/LLMSelection/LiteLLMOptions/index.jsx @@ -0,0 +1,148 @@ +import { useEffect, useState } from "react"; +import System from "@/models/system"; + +export default function LiteLLMOptions({ settings }) { + const [basePathValue, setBasePathValue] = useState(settings?.LiteLLMBasePath); + const [basePath, setBasePath] = useState(settings?.LiteLLMBasePath); + const [apiKeyValue, setApiKeyValue] = useState(settings?.LiteLLMAPIKey); + const [apiKey, setApiKey] = useState(settings?.LiteLLMAPIKey); + + return ( +
+
+
+ + setBasePathValue(e.target.value)} + onBlur={() => setBasePath(basePathValue)} + /> +
+ +
+ + e.target.blur()} + defaultValue={settings?.LiteLLMTokenLimit} + required={true} + autoComplete="off" + /> +
+
+
+
+
+ +
+ setApiKeyValue(e.target.value)} + onBlur={() => setApiKey(apiKeyValue)} + /> +
+
+
+ ); +} + +function LiteLLMModelSelection({ settings, basePath = null, apiKey = null }) { + const [customModels, setCustomModels] = useState([]); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function findCustomModels() { + if (!basePath) { + setCustomModels([]); + setLoading(false); + return; + } + setLoading(true); + const { models } = await System.customModels( + "litellm", + typeof apiKey === "boolean" ? null : apiKey, + basePath + ); + setCustomModels(models || []); + setLoading(false); + } + findCustomModels(); + }, [basePath, apiKey]); + + if (loading || customModels.length == 0) { + return ( +
+ + +
+ ); + } + + return ( +
+ + +
+ ); +} diff --git a/frontend/src/media/llmprovider/litellm.png b/frontend/src/media/llmprovider/litellm.png new file mode 100644 index 00000000..da4faf5b Binary files /dev/null and b/frontend/src/media/llmprovider/litellm.png differ diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx index 4db20123..5c4b0b2f 100644 --- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx @@ -21,6 +21,7 @@ import GroqLogo from "@/media/llmprovider/groq.png"; import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png"; import TextGenWebUILogo from "@/media/llmprovider/text-generation-webui.png"; import CohereLogo from "@/media/llmprovider/cohere.png"; +import LiteLLMLogo from "@/media/llmprovider/litellm.png"; import PreLoader from "@/components/Preloader"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions"; @@ -38,12 +39,13 @@ import PerplexityOptions from "@/components/LLMSelection/PerplexityOptions"; import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions"; import GroqAiOptions from "@/components/LLMSelection/GroqAiOptions"; import CohereAiOptions from "@/components/LLMSelection/CohereAiOptions"; +import KoboldCPPOptions from "@/components/LLMSelection/KoboldCPPOptions"; +import TextGenWebUIOptions from "@/components/LLMSelection/TextGenWebUIOptions"; +import LiteLLMOptions from "@/components/LLMSelection/LiteLLMOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; import CTAButton from "@/components/lib/CTAButton"; -import KoboldCPPOptions from "@/components/LLMSelection/KoboldCPPOptions"; -import TextGenWebUIOptions from "@/components/LLMSelection/TextGenWebUIOptions"; export const AVAILABLE_LLM_PROVIDERS = [ { @@ -186,6 +188,14 @@ export const AVAILABLE_LLM_PROVIDERS = [ description: "Run Cohere's powerful Command models.", requiredConfig: ["CohereApiKey"], }, + { + name: "LiteLLM", + value: "litellm", + logo: LiteLLMLogo, + options: (settings) => , + description: "Run LiteLLM's OpenAI compatible proxy for various LLMs.", + requiredConfig: ["LiteLLMBasePath"], + }, { name: "Generic OpenAI", value: "generic-openai", diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index 5c6b3798..b6ae8cb2 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -17,6 +17,8 @@ import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg"; import GroqLogo from "@/media/llmprovider/groq.png"; import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png"; import TextGenWebUILogo from "@/media/llmprovider/text-generation-webui.png"; +import LiteLLMLogo from "@/media/llmprovider/litellm.png"; + import CohereLogo from "@/media/llmprovider/cohere.png"; import ZillizLogo from "@/media/vectordbs/zilliz.png"; import AstraDBLogo from "@/media/vectordbs/astraDB.png"; @@ -168,6 +170,13 @@ export const LLM_SELECTION_PRIVACY = { ], logo: CohereLogo, }, + litellm: { + name: "LiteLLM", + description: [ + "Your model and chats are only accessible on the server running LiteLLM", + ], + logo: LiteLLMLogo, + }, }; export const VECTOR_DB_PRIVACY = { diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx index 966253f4..25b46522 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx @@ -17,6 +17,8 @@ import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg"; import GroqLogo from "@/media/llmprovider/groq.png"; import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png"; import TextGenWebUILogo from "@/media/llmprovider/text-generation-webui.png"; +import LiteLLMLogo from "@/media/llmprovider/litellm.png"; + import CohereLogo from "@/media/llmprovider/cohere.png"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions"; @@ -34,14 +36,15 @@ import PerplexityOptions from "@/components/LLMSelection/PerplexityOptions"; import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions"; import GroqAiOptions from "@/components/LLMSelection/GroqAiOptions"; import CohereAiOptions from "@/components/LLMSelection/CohereAiOptions"; +import KoboldCPPOptions from "@/components/LLMSelection/KoboldCPPOptions"; +import TextGenWebUIOptions from "@/components/LLMSelection/TextGenWebUIOptions"; +import LiteLLMOptions from "@/components/LLMSelection/LiteLLMOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import System from "@/models/system"; import paths from "@/utils/paths"; import showToast from "@/utils/toast"; import { useNavigate } from "react-router-dom"; -import KoboldCPPOptions from "@/components/LLMSelection/KoboldCPPOptions"; -import TextGenWebUIOptions from "@/components/LLMSelection/TextGenWebUIOptions"; const TITLE = "LLM Preference"; const DESCRIPTION = @@ -164,6 +167,13 @@ const LLMS = [ options: (settings) => , description: "Run Cohere's powerful Command models.", }, + { + name: "LiteLLM", + value: "litellm", + logo: LiteLLMLogo, + options: (settings) => , + description: "Run LiteLLM's OpenAI compatible proxy for various LLMs.", + }, { name: "Generic OpenAI", value: "generic-openai", diff --git a/server/.env.example b/server/.env.example index 5e0233b7..4be9ab75 100644 --- a/server/.env.example +++ b/server/.env.example @@ -79,6 +79,12 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea # GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=4096 # GENERIC_OPEN_AI_API_KEY=sk-123abc +# LLM_PROVIDER='litellm' +# LITE_LLM_MODEL_PREF='gpt-3.5-turbo' +# LITE_LLM_MODEL_TOKEN_LIMIT=4096 +# LITE_LLM_BASE_PATH='http://127.0.0.1:4000' +# LITE_LLM_API_KEY='sk-123abc' + # LLM_PROVIDER='cohere' # COHERE_API_KEY= # COHERE_MODEL_PREF='command-r' diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 7b4f21ee..68d1d0dd 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -408,6 +408,12 @@ const SystemSettings = { TextGenWebUITokenLimit: process.env.TEXT_GEN_WEB_UI_MODEL_TOKEN_LIMIT, TextGenWebUIAPIKey: !!process.env.TEXT_GEN_WEB_UI_API_KEY, + // LiteLLM Keys + LiteLLMModelPref: process.env.LITE_LLM_MODEL_PREF, + LiteLLMTokenLimit: process.env.LITE_LLM_MODEL_TOKEN_LIMIT, + LiteLLMBasePath: process.env.LITE_LLM_BASE_PATH, + LiteLLMApiKey: !!process.env.LITE_LLM_API_KEY, + // Generic OpenAI Keys GenericOpenAiBasePath: process.env.GENERIC_OPEN_AI_BASE_PATH, GenericOpenAiModelPref: process.env.GENERIC_OPEN_AI_MODEL_PREF, diff --git a/server/utils/AiProviders/liteLLM/index.js b/server/utils/AiProviders/liteLLM/index.js new file mode 100644 index 00000000..5973826c --- /dev/null +++ b/server/utils/AiProviders/liteLLM/index.js @@ -0,0 +1,178 @@ +const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { + writeResponseChunk, + clientAbortedHandler, +} = require("../../helpers/chat/responses"); + +class LiteLLM { + constructor(embedder = null, modelPreference = null) { + const { OpenAI: OpenAIApi } = require("openai"); + if (!process.env.LITE_LLM_BASE_PATH) + throw new Error( + "LiteLLM must have a valid base path to use for the api." + ); + + this.basePath = process.env.LITE_LLM_BASE_PATH; + this.openai = new OpenAIApi({ + baseURL: this.basePath, + apiKey: process.env.LITE_LLM_API_KEY ?? null, + }); + this.model = modelPreference ?? process.env.LITE_LLM_MODEL_PREF ?? null; + this.maxTokens = process.env.LITE_LLM_MODEL_TOKEN_LIMIT ?? 1024; + if (!this.model) throw new Error("LiteLLM must have a valid model set."); + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + if (!embedder) + console.warn( + "No embedding provider defined for LiteLLM - falling back to NativeEmbedder for embedding!" + ); + this.embedder = !embedder ? new NativeEmbedder() : embedder; + this.defaultTemp = 0.7; + this.log(`Inference API: ${this.basePath} Model: ${this.model}`); + } + + log(text, ...args) { + console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args); + } + + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); + } + + streamingEnabled() { + return "streamGetChatCompletion" in this; + } + + // Ensure the user set a value for the token limit + // and if undefined - assume 4096 window. + promptWindowLimit() { + const limit = process.env.LITE_LLM_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No token context limit was set."); + return Number(limit); + } + + // Short circuit since we have no idea if the model is valid or not + // in pre-flight for generic endpoints + isValidChatCompletionModel(_modelName = "") { + return true; + } + + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + }) { + const prompt = { + role: "system", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + return [prompt, ...chatHistory, { role: "user", content: userPrompt }]; + } + + async isSafe(_input = "") { + // Not implemented so must be stubbed + return { safe: true, reasons: [] }; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + const result = await this.openai.chat.completions + .create({ + model: this.model, + messages, + temperature, + max_tokens: parseInt(this.maxTokens), // LiteLLM requires int + }) + .catch((e) => { + throw new Error(e.response.data.error.message); + }); + + if (!result.hasOwnProperty("choices") || result.choices.length === 0) + return null; + return result.choices[0].message.content; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + const streamRequest = await this.openai.chat.completions.create({ + model: this.model, + stream: true, + messages, + temperature, + max_tokens: parseInt(this.maxTokens), // LiteLLM requires int + }); + return streamRequest; + } + + handleStream(response, stream, responseProps) { + const { uuid = uuidv4(), sources = [] } = responseProps; + + return new Promise(async (resolve) => { + let fullText = ""; + + const handleAbort = () => clientAbortedHandler(resolve, fullText); + response.on("close", handleAbort); + + for await (const chunk of stream) { + const message = chunk?.choices?.[0]; + const token = message?.delta?.content; + + if (token) { + fullText += token; + writeResponseChunk(response, { + uuid, + sources: [], + type: "textResponseChunk", + textResponse: token, + close: false, + error: false, + }); + } + + // LiteLLM does not give a finish reason in stream until the final chunk + if (message.finish_reason || message.finish_reason === "stop") { + writeResponseChunk(response, { + uuid, + sources, + type: "textResponseChunk", + textResponse: "", + close: true, + error: false, + }); + response.removeListener("close", handleAbort); + resolve(fullText); + } + } + }); + } + + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +module.exports = { + LiteLLM, +}; diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js index caf5a77c..31a3eb2c 100644 --- a/server/utils/helpers/customModels.js +++ b/server/utils/helpers/customModels.js @@ -16,6 +16,7 @@ const SUPPORT_CUSTOM_MODELS = [ "openrouter", "lmstudio", "koboldcpp", + "litellm", "elevenlabs-tts", ]; @@ -44,6 +45,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) { return await getLMStudioModels(basePath); case "koboldcpp": return await getKoboldCPPModels(basePath); + case "litellm": + return await liteLLMModels(basePath, apiKey); case "elevenlabs-tts": return await getElevenLabsModels(apiKey); default: @@ -164,6 +167,25 @@ async function localAIModels(basePath = null, apiKey = null) { return { models, error: null }; } +async function liteLLMModels(basePath = null, apiKey = null) { + const { OpenAI: OpenAIApi } = require("openai"); + const openai = new OpenAIApi({ + baseURL: basePath || process.env.LITE_LLM_BASE_PATH, + apiKey: apiKey || process.env.LITE_LLM_API_KEY || null, + }); + const models = await openai.models + .list() + .then((results) => results.data) + .catch((e) => { + console.error(`LiteLLM:listModels`, e.message); + return []; + }); + + // Api Key was successful so lets save it for future uses + if (models.length > 0 && !!apiKey) process.env.LITE_LLM_API_KEY = apiKey; + return { models, error: null }; +} + async function getLMStudioModels(basePath = null) { try { const { OpenAI: OpenAIApi } = require("openai"); diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index 72fbfc6e..dde8d7ab 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -86,6 +86,9 @@ function getLLMProvider({ provider = null, model = null } = {}) { case "cohere": const { CohereLLM } = require("../AiProviders/cohere"); return new CohereLLM(embedder, model); + case "litellm": + const { LiteLLM } = require("../AiProviders/liteLLM"); + return new LiteLLM(embedder, model); case "generic-openai": const { GenericOpenAiLLM } = require("../AiProviders/genericOpenAi"); return new GenericOpenAiLLM(embedder, model); diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index e2b1d2e1..8630d85a 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -160,6 +160,24 @@ const KEY_MAPPING = { checks: [], }, + // LiteLLM Settings + LiteLLMModelPref: { + envKey: "LITE_LLM_MODEL_PREF", + checks: [isNotEmpty], + }, + LiteLLMTokenLimit: { + envKey: "LITE_LLM_MODEL_TOKEN_LIMIT", + checks: [nonZero], + }, + LiteLLMBasePath: { + envKey: "LITE_LLM_BASE_PATH", + checks: [isValidURL], + }, + LiteLLMApiKey: { + envKey: "LITE_LLM_API_KEY", + checks: [], + }, + // Generic OpenAI InferenceSettings GenericOpenAiBasePath: { envKey: "GENERIC_OPEN_AI_BASE_PATH", @@ -469,6 +487,7 @@ function supportedLLM(input = "") { "koboldcpp", "textgenwebui", "cohere", + "litellm", "generic-openai", ].includes(input); return validSelection ? null : `${input} is not a valid LLM provider.`;