diff --git a/.vscode/settings.json b/.vscode/settings.json index dde2d134..459f57fc 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,6 @@ { "cSpell.words": [ + "Ollama", "openai", "Qdrant", "Weaviate" diff --git a/README.md b/README.md index 44e0557f..36127cb3 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ Some cool features of AnythingLLM - [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service) - [Anthropic ClaudeV2](https://www.anthropic.com/) - [Google Gemini Pro](https://ai.google.dev/) +- [Ollama (chat models)](https://ollama.ai/) - [LM Studio (all models)](https://lmstudio.ai) - [LocalAi (all models)](https://localai.io/) diff --git a/docker/.env.example b/docker/.env.example index cc9fa06f..0db90aa2 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -35,6 +35,11 @@ GID='1000' # LOCAL_AI_MODEL_TOKEN_LIMIT=4096 # LOCAL_AI_API_KEY="sk-123abc" +# LLM_PROVIDER='ollama' +# OLLAMA_BASE_PATH='http://host.docker.internal:11434' +# OLLAMA_MODEL_PREF='llama2' +# OLLAMA_MODEL_TOKEN_LIMIT=4096 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx new file mode 100644 index 00000000..a2034bf7 --- /dev/null +++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx @@ -0,0 +1,120 @@ +import { useEffect, useState } from "react"; +import System from "@/models/system"; + +export default function OllamaLLMOptions({ settings }) { + const [basePathValue, setBasePathValue] = useState( + settings?.OllamaLLMBasePath + ); + const [basePath, setBasePath] = useState(settings?.OllamaLLMBasePath); + + return ( +
+
+
+ + setBasePathValue(e.target.value)} + onBlur={() => setBasePath(basePathValue)} + /> +
+ +
+ + e.target.blur()} + defaultValue={settings?.OllamaLLMTokenLimit} + required={true} + autoComplete="off" + /> +
+
+
+ ); +} + +function OllamaLLMModelSelection({ settings, basePath = null }) { + const [customModels, setCustomModels] = useState([]); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function findCustomModels() { + if (!basePath) { + setCustomModels([]); + setLoading(false); + return; + } + setLoading(true); + const { models } = await System.customModels("ollama", null, basePath); + setCustomModels(models || []); + setLoading(false); + } + findCustomModels(); + }, [basePath]); + + if (loading || customModels.length == 0) { + return ( +
+ + +
+ ); + } + + return ( +
+ + +
+ ); +} diff --git a/frontend/src/media/llmprovider/ollama.png b/frontend/src/media/llmprovider/ollama.png new file mode 100644 index 00000000..2a898a6e Binary files /dev/null and b/frontend/src/media/llmprovider/ollama.png differ diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx index a0169fe1..0cecaa4d 100644 --- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx @@ -8,6 +8,7 @@ import OpenAiLogo from "@/media/llmprovider/openai.png"; import AzureOpenAiLogo from "@/media/llmprovider/azure.png"; import AnthropicLogo from "@/media/llmprovider/anthropic.png"; import GeminiLogo from "@/media/llmprovider/gemini.png"; +import OllamaLogo from "@/media/llmprovider/ollama.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; import PreLoader from "@/components/Preloader"; @@ -19,6 +20,7 @@ import LMStudioOptions from "@/components/LLMSelection/LMStudioOptions"; import LocalAiOptions from "@/components/LLMSelection/LocalAiOptions"; import NativeLLMOptions from "@/components/LLMSelection/NativeLLMOptions"; import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions"; +import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions"; export default function GeneralLLMPreference() { const [saving, setSaving] = useState(false); @@ -163,6 +165,15 @@ export default function GeneralLLMPreference() { image={LocalAiLogo} onClick={updateLLMChoice} /> + {!window.location.hostname.includes("useanything.com") && ( )} + {llmChoice === "ollama" && ( + + )} {llmChoice === "native" && ( )} diff --git a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx index cd63d74d..81b93c5d 100644 --- a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx @@ -5,6 +5,7 @@ import OpenAiLogo from "@/media/llmprovider/openai.png"; import AzureOpenAiLogo from "@/media/llmprovider/azure.png"; import AnthropicLogo from "@/media/llmprovider/anthropic.png"; import GeminiLogo from "@/media/llmprovider/gemini.png"; +import OllamaLogo from "@/media/llmprovider/ollama.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; import ChromaLogo from "@/media/vectordbs/chroma.png"; @@ -61,6 +62,13 @@ const LLM_SELECTION_PRIVACY = { ], logo: LocalAiLogo, }, + ollama: { + name: "Ollama", + description: [ + "Your model and chats are only accessible on the machine running Ollama models", + ], + logo: OllamaLogo, + }, native: { name: "Custom Llama Model", description: [ diff --git a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx index f877e31d..850dea3c 100644 --- a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx +++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx @@ -4,6 +4,7 @@ import OpenAiLogo from "@/media/llmprovider/openai.png"; import AzureOpenAiLogo from "@/media/llmprovider/azure.png"; import AnthropicLogo from "@/media/llmprovider/anthropic.png"; import GeminiLogo from "@/media/llmprovider/gemini.png"; +import OllamaLogo from "@/media/llmprovider/ollama.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; import System from "@/models/system"; @@ -16,6 +17,7 @@ import LMStudioOptions from "@/components/LLMSelection/LMStudioOptions"; import LocalAiOptions from "@/components/LLMSelection/LocalAiOptions"; import NativeLLMOptions from "@/components/LLMSelection/NativeLLMOptions"; import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions"; +import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions"; function LLMSelection({ nextStep, prevStep, currentStep }) { const [llmChoice, setLLMChoice] = useState("openai"); @@ -124,13 +126,24 @@ function LLMSelection({ nextStep, prevStep, currentStep }) { onClick={updateLLMChoice} /> + {!window.location.hostname.includes("useanything.com") && ( + + )}
{llmChoice === "openai" && } @@ -143,6 +156,7 @@ function LLMSelection({ nextStep, prevStep, currentStep }) { )} {llmChoice === "localai" && } + {llmChoice === "ollama" && } {llmChoice === "native" && }
diff --git a/server/.env.example b/server/.env.example index f73e0e08..07abed62 100644 --- a/server/.env.example +++ b/server/.env.example @@ -32,6 +32,11 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea # LOCAL_AI_MODEL_TOKEN_LIMIT=4096 # LOCAL_AI_API_KEY="sk-123abc" +# LLM_PROVIDER='ollama' +# OLLAMA_BASE_PATH='http://host.docker.internal:11434' +# OLLAMA_MODEL_PREF='llama2' +# OLLAMA_MODEL_TOKEN_LIMIT=4096 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index b5dfeb70..a66f93e1 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -126,6 +126,20 @@ const SystemSettings = { AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF, } : {}), + + ...(llmProvider === "ollama" + ? { + OllamaLLMBasePath: process.env.OLLAMA_BASE_PATH, + OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF, + OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT, + + // For embedding credentials when ollama is selected. + OpenAiKey: !!process.env.OPEN_AI_KEY, + AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT, + AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY, + AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF, + } + : {}), ...(llmProvider === "native" ? { NativeLLMModelPref: process.env.NATIVE_LLM_MODEL_PREF, diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js new file mode 100644 index 00000000..3aa58f76 --- /dev/null +++ b/server/utils/AiProviders/ollama/index.js @@ -0,0 +1,208 @@ +const { chatPrompt } = require("../../chats"); + +// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md +class OllamaAILLM { + constructor(embedder = null) { + if (!process.env.OLLAMA_BASE_PATH) + throw new Error("No Ollama Base Path was set."); + + this.basePath = process.env.OLLAMA_BASE_PATH; + this.model = process.env.OLLAMA_MODEL_PREF; + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + if (!embedder) + throw new Error( + "INVALID OLLAMA SETUP. No embedding engine has been set. Go to instance settings and set up an embedding interface to use Ollama as your LLM." + ); + this.embedder = embedder; + } + + streamingEnabled() { + return "streamChat" in this && "streamGetChatCompletion" in this; + } + + // Ensure the user set a value for the token limit + // and if undefined - assume 4096 window. + promptWindowLimit() { + const limit = process.env.OLLAMA_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No Ollama token context limit was set."); + return Number(limit); + } + + async isValidChatCompletionModel(_ = "") { + return true; + } + + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + }) { + const prompt = { + role: "system", + content: `${systemPrompt} +Context: + ${contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("")}`, + }; + return [prompt, ...chatHistory, { role: "user", content: userPrompt }]; + } + + async isSafe(_input = "") { + // Not implemented so must be stubbed + return { safe: true, reasons: [] }; + } + + async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) { + const textResponse = await fetch(`${this.basePath}/api/chat`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: this.model, + stream: false, + options: { + temperature: Number(workspace?.openAiTemp ?? 0.7), + }, + messages: await this.compressMessages( + { + systemPrompt: chatPrompt(workspace), + userPrompt: prompt, + chatHistory, + }, + rawHistory + ), + }), + }) + .then((res) => { + if (!res.ok) + throw new Error(`Ollama:sendChat ${res.status} ${res.statusText}`); + return res.json(); + }) + .then((data) => data?.message?.content) + .catch((e) => { + console.error(e); + throw new Error(`Ollama::sendChat failed with: ${error.message}`); + }); + + if (!textResponse.length) + throw new Error(`Ollama::sendChat text response was empty.`); + + return textResponse; + } + + async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) { + const response = await fetch(`${this.basePath}/api/chat`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: this.model, + stream: true, + options: { + temperature: Number(workspace?.openAiTemp ?? 0.7), + }, + messages: await this.compressMessages( + { + systemPrompt: chatPrompt(workspace), + userPrompt: prompt, + chatHistory, + }, + rawHistory + ), + }), + }).catch((e) => { + console.error(e); + throw new Error(`Ollama:streamChat ${error.message}`); + }); + + return { type: "ollamaStream", response }; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + const textResponse = await fetch(`${this.basePath}/api/chat`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: this.model, + messages, + stream: false, + options: { + temperature, + }, + }), + }) + .then((res) => { + if (!res.ok) + throw new Error( + `Ollama:getChatCompletion ${res.status} ${res.statusText}` + ); + return res.json(); + }) + .then((data) => data?.message?.content) + .catch((e) => { + console.error(e); + throw new Error( + `Ollama::getChatCompletion failed with: ${error.message}` + ); + }); + + if (!textResponse.length) + throw new Error(`Ollama::getChatCompletion text response was empty.`); + + return textResponse; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + const response = await fetch(`${this.basePath}/api/chat`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: this.model, + stream: true, + messages, + options: { + temperature, + }, + }), + }).catch((e) => { + console.error(e); + throw new Error(`Ollama:streamGetChatCompletion ${error.message}`); + }); + + return { type: "ollamaStream", response }; + } + + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +module.exports = { + OllamaAILLM, +}; diff --git a/server/utils/chats/stream.js b/server/utils/chats/stream.js index 5bdb7a1f..b0dc9186 100644 --- a/server/utils/chats/stream.js +++ b/server/utils/chats/stream.js @@ -199,6 +199,7 @@ async function streamEmptyEmbeddingChat({ return; } +// TODO: Refactor this implementation function handleStreamResponses(response, stream, responseProps) { const { uuid = uuidv4(), sources = [] } = responseProps; @@ -231,6 +232,34 @@ function handleStreamResponses(response, stream, responseProps) { }); } + if (stream?.type === "ollamaStream") { + return new Promise(async (resolve) => { + let fullText = ""; + for await (const dataChunk of stream.response.body) { + const chunk = JSON.parse(Buffer.from(dataChunk).toString()); + fullText += chunk.message.content; + writeResponseChunk(response, { + uuid, + sources: [], + type: "textResponseChunk", + textResponse: chunk.message.content, + close: false, + error: false, + }); + } + + writeResponseChunk(response, { + uuid, + sources, + type: "textResponseChunk", + textResponse: "", + close: true, + error: false, + }); + resolve(fullText); + }); + } + // If stream is not a regular OpenAI Stream (like if using native model) // we can just iterate the stream content instead. if (!stream.hasOwnProperty("data")) { diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js index 3b4397c3..5bd7b299 100644 --- a/server/utils/helpers/customModels.js +++ b/server/utils/helpers/customModels.js @@ -1,4 +1,4 @@ -const SUPPORT_CUSTOM_MODELS = ["openai", "localai", "native-llm"]; +const SUPPORT_CUSTOM_MODELS = ["openai", "localai", "ollama", "native-llm"]; async function getCustomModels(provider = "", apiKey = null, basePath = null) { if (!SUPPORT_CUSTOM_MODELS.includes(provider)) @@ -9,6 +9,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) { return await openAiModels(apiKey); case "localai": return await localAIModels(basePath, apiKey); + case "ollama": + return await ollamaAIModels(basePath, apiKey); case "native-llm": return nativeLLMModels(); default: @@ -59,6 +61,37 @@ async function localAIModels(basePath = null, apiKey = null) { return { models, error: null }; } +async function ollamaAIModels(basePath = null, _apiKey = null) { + let url; + try { + new URL(basePath); + if (basePath.split("").slice(-1)?.[0] === "/") + throw new Error("BasePath Cannot end in /!"); + url = basePath; + } catch { + return { models: [], error: "Not a valid URL." }; + } + + const models = await fetch(`${url}/api/tags`) + .then((res) => { + if (!res.ok) + throw new Error(`Could not reach Ollama server! ${res.status}`); + return res.json(); + }) + .then((data) => data?.models || []) + .then((models) => + models.map((model) => { + return { id: model.name }; + }) + ) + .catch((e) => { + console.error(e); + return []; + }); + + return { models, error: null }; +} + function nativeLLMModels() { const fs = require("fs"); const path = require("path"); diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index 115df400..bde5e8a0 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -43,6 +43,9 @@ function getLLMProvider() { case "localai": const { LocalAiLLM } = require("../AiProviders/localAi"); return new LocalAiLLM(embedder); + case "ollama": + const { OllamaAILLM } = require("../AiProviders/ollama"); + return new OllamaAILLM(embedder); case "native": const { NativeLLM } = require("../AiProviders/native"); return new NativeLLM(embedder); diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index fe4f4f5c..11278f97 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -81,6 +81,19 @@ const KEY_MAPPING = { checks: [], }, + OllamaLLMBasePath: { + envKey: "OLLAMA_BASE_PATH", + checks: [isNotEmpty, validOllamaLLMBasePath], + }, + OllamaLLMModelPref: { + envKey: "OLLAMA_MODEL_PREF", + checks: [], + }, + OllamaLLMTokenLimit: { + envKey: "OLLAMA_MODEL_TOKEN_LIMIT", + checks: [nonZero], + }, + // Native LLM Settings NativeLLMModelPref: { envKey: "NATIVE_LLM_MODEL_PREF", @@ -208,6 +221,17 @@ function validLLMExternalBasePath(input = "") { } } +function validOllamaLLMBasePath(input = "") { + try { + new URL(input); + if (input.split("").slice(-1)?.[0] === "/") + return "URL cannot end with a slash"; + return null; + } catch { + return "Not a valid URL"; + } +} + function supportedLLM(input = "") { return [ "openai", @@ -216,6 +240,7 @@ function supportedLLM(input = "") { "gemini", "lmstudio", "localai", + "ollama", "native", ].includes(input); }