Add Ollama as LLM provider option (#494)

* Add support for Ollama as LLM provider resolves #493
2024-10-03 17:30:11 +02:00 · 2023-12-27 17:21:47 -08:00 · 2023-12-27 17:21:47 -08:00 · e0a0a8976d
commit e0a0a8976d
parent 24227e48a7
15 changed files with 486 additions and 6 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,5 +1,6 @@
 {
  "cSpell.words": [
    "Ollama",
    "openai",
    "Qdrant",
    "Weaviate"
--- a/README.md
+++ b/README.md
@ -59,6 +59,7 @@ Some cool features of AnythingLLM
 - [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
 - [Anthropic ClaudeV2](https://www.anthropic.com/)
 - [Google Gemini Pro](https://ai.google.dev/)
 - [Ollama (chat models)](https://ollama.ai/)
 - [LM Studio (all models)](https://lmstudio.ai)
 - [LocalAi (all models)](https://localai.io/)
--- a/docker/.env.example
+++ b/docker/.env.example
@ -35,6 +35,11 @@ GID='1000'
 # LOCAL_AI_MODEL_TOKEN_LIMIT=4096
 # LOCAL_AI_API_KEY="sk-123abc"
 # LLM_PROVIDER='ollama'
 # OLLAMA_BASE_PATH='http://host.docker.internal:11434'
 # OLLAMA_MODEL_PREF='llama2'
 # OLLAMA_MODEL_TOKEN_LIMIT=4096
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
--- a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
@ -0,0 +1,120 @@
 import { useEffect, useState } from "react";
 import System from "@/models/system";
 export default function OllamaLLMOptions({ settings }) {
  const [basePathValue, setBasePathValue] = useState(
    settings?.OllamaLLMBasePath
  );
  const [basePath, setBasePath] = useState(settings?.OllamaLLMBasePath);
  return (
    <div className="w-full flex flex-col gap-y-4">
      <div className="w-full flex items-center gap-4">
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-4">
            Ollama Base URL
          </label>
          <input
            type="url"
            name="OllamaLLMBasePath"
            className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
            placeholder="http://127.0.0.1:11434"
            defaultValue={settings?.OllamaLLMBasePath}
            required={true}
            autoComplete="off"
            spellCheck={false}
            onChange={(e) => setBasePathValue(e.target.value)}
            onBlur={() => setBasePath(basePathValue)}
          />
        </div>
        <OllamaLLMModelSelection settings={settings} basePath={basePath} />
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-4">
            Token context window
          </label>
          <input
            type="number"
            name="OllamaLLMTokenLimit"
            className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
            placeholder="4096"
            min={1}
            onScroll={(e) => e.target.blur()}
            defaultValue={settings?.OllamaLLMTokenLimit}
            required={true}
            autoComplete="off"
          />
        </div>
      </div>
    </div>
  );
 }
 function OllamaLLMModelSelection({ settings, basePath = null }) {
  const [customModels, setCustomModels] = useState([]);
  const [loading, setLoading] = useState(true);
  useEffect(() => {
    async function findCustomModels() {
      if (!basePath) {
        setCustomModels([]);
        setLoading(false);
        return;
      }
      setLoading(true);
      const { models } = await System.customModels("ollama", null, basePath);
      setCustomModels(models || []);
      setLoading(false);
    }
    findCustomModels();
  }, [basePath]);
  if (loading || customModels.length == 0) {
    return (
      <div className="flex flex-col w-60">
        <label className="text-white text-sm font-semibold block mb-4">
          Chat Model Selection
        </label>
        <select
          name="OllamaLLMModelPref"
          disabled={true}
          className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
        >
          <option disabled={true} selected={true}>
            {!!basePath
              ? "-- loading available models --"
              : "-- waiting for URL --"}
          </option>
        </select>
      </div>
    );
  }
  return (
    <div className="flex flex-col w-60">
      <label className="text-white text-sm font-semibold block mb-4">
        Chat Model Selection
      </label>
      <select
        name="OllamaLLMModelPref"
        required={true}
        className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
      >
        {customModels.length > 0 && (
          <optgroup label="Your loaded models">
            {customModels.map((model) => {
              return (
                <option
                  key={model.id}
                  value={model.id}
                  selected={settings.OllamaLLMModelPref === model.id}
                >
                  {model.id}
                </option>
              );
            })}
          </optgroup>
        )}
      </select>
    </div>
  );
 }
--- a/frontend/src/media/llmprovider/ollama.png
+++ b/frontend/src/media/llmprovider/ollama.png
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@ -8,6 +8,7 @@ import OpenAiLogo from "@/media/llmprovider/openai.png";
 import AzureOpenAiLogo from "@/media/llmprovider/azure.png";
 import AnthropicLogo from "@/media/llmprovider/anthropic.png";
 import GeminiLogo from "@/media/llmprovider/gemini.png";
 import OllamaLogo from "@/media/llmprovider/ollama.png";
 import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
 import PreLoader from "@/components/Preloader";
@ -19,6 +20,7 @@ import LMStudioOptions from "@/components/LLMSelection/LMStudioOptions";
 import LocalAiOptions from "@/components/LLMSelection/LocalAiOptions";
 import NativeLLMOptions from "@/components/LLMSelection/NativeLLMOptions";
 import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions";
 import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions";
 export default function GeneralLLMPreference() {
  const [saving, setSaving] = useState(false);
@ -163,6 +165,15 @@ export default function GeneralLLMPreference() {
                  image={LocalAiLogo}
                  onClick={updateLLMChoice}
                />
                <LLMProviderOption
                  name="Ollama"
                  value="ollama"
                  link="ollama.ai"
                  description="Run LLMs locally on your own machine."
                  checked={llmChoice === "ollama"}
                  image={OllamaLogo}
                  onClick={updateLLMChoice}
                />
                {!window.location.hostname.includes("useanything.com") && (
                  <LLMProviderOption
                    name="Custom Llama Model"
@ -193,6 +204,9 @@ export default function GeneralLLMPreference() {
                {llmChoice === "localai" && (
                  <LocalAiOptions settings={settings} showAlert={true} />
                )}
                {llmChoice === "ollama" && (
                  <OllamaLLMOptions settings={settings} />
                )}
                {llmChoice === "native" && (
                  <NativeLLMOptions settings={settings} />
                )}
--- a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx
@ -5,6 +5,7 @@ import OpenAiLogo from "@/media/llmprovider/openai.png";
 import AzureOpenAiLogo from "@/media/llmprovider/azure.png";
 import AnthropicLogo from "@/media/llmprovider/anthropic.png";
 import GeminiLogo from "@/media/llmprovider/gemini.png";
 import OllamaLogo from "@/media/llmprovider/ollama.png";
 import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
 import ChromaLogo from "@/media/vectordbs/chroma.png";
@ -61,6 +62,13 @@ const LLM_SELECTION_PRIVACY = {
    ],
    logo: LocalAiLogo,
  },
  ollama: {
    name: "Ollama",
    description: [
      "Your model and chats are only accessible on the machine running Ollama models",
    ],
    logo: OllamaLogo,
  },
  native: {
    name: "Custom Llama Model",
    description: [
--- a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx
@ -4,6 +4,7 @@ import OpenAiLogo from "@/media/llmprovider/openai.png";
 import AzureOpenAiLogo from "@/media/llmprovider/azure.png";
 import AnthropicLogo from "@/media/llmprovider/anthropic.png";
 import GeminiLogo from "@/media/llmprovider/gemini.png";
 import OllamaLogo from "@/media/llmprovider/ollama.png";
 import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
 import System from "@/models/system";
@ -16,6 +17,7 @@ import LMStudioOptions from "@/components/LLMSelection/LMStudioOptions";
 import LocalAiOptions from "@/components/LLMSelection/LocalAiOptions";
 import NativeLLMOptions from "@/components/LLMSelection/NativeLLMOptions";
 import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions";
 import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions";
 function LLMSelection({ nextStep, prevStep, currentStep }) {
  const [llmChoice, setLLMChoice] = useState("openai");
@ -124,13 +126,24 @@ function LLMSelection({ nextStep, prevStep, currentStep }) {
              onClick={updateLLMChoice}
            />
            <LLMProviderOption
-              name="Custom Llama Model"
+              name="Ollama"
-              value="native"
+              value="ollama"
-              description="Use a downloaded custom Llama model for chatting on this AnythingLLM instance."
+              link="ollama.ai"
-              checked={llmChoice === "native"}
+              description="Run LLMs locally on your own machine."
-              image={AnythingLLMIcon}
+              checked={llmChoice === "ollama"}
              image={OllamaLogo}
              onClick={updateLLMChoice}
            />
            {!window.location.hostname.includes("useanything.com") && (
              <LLMProviderOption
                name="Custom Llama Model"
                value="native"
                description="Use a downloaded custom Llama model for chatting on this AnythingLLM instance."
                checked={llmChoice === "native"}
                image={AnythingLLMIcon}
                onClick={updateLLMChoice}
              />
            )}
          </div>
          <div className="mt-4 flex flex-wrap gap-4 max-w-[752px]">
            {llmChoice === "openai" && <OpenAiOptions settings={settings} />}
@ -143,6 +156,7 @@ function LLMSelection({ nextStep, prevStep, currentStep }) {
              <LMStudioOptions settings={settings} />
            )}
            {llmChoice === "localai" && <LocalAiOptions settings={settings} />}
            {llmChoice === "ollama" && <OllamaLLMOptions settings={settings} />}
            {llmChoice === "native" && <NativeLLMOptions settings={settings} />}
          </div>
        </div>
--- a/server/.env.example
+++ b/server/.env.example
@ -32,6 +32,11 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
 # LOCAL_AI_MODEL_TOKEN_LIMIT=4096
 # LOCAL_AI_API_KEY="sk-123abc"
 # LLM_PROVIDER='ollama'
 # OLLAMA_BASE_PATH='http://host.docker.internal:11434'
 # OLLAMA_MODEL_PREF='llama2'
 # OLLAMA_MODEL_TOKEN_LIMIT=4096
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -126,6 +126,20 @@ const SystemSettings = {
            AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
          }
        : {}),
      ...(llmProvider === "ollama"
        ? {
            OllamaLLMBasePath: process.env.OLLAMA_BASE_PATH,
            OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF,
            OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT,
            // For embedding credentials when ollama is selected.
            OpenAiKey: !!process.env.OPEN_AI_KEY,
            AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT,
            AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY,
            AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
          }
        : {}),
      ...(llmProvider === "native"
        ? {
            NativeLLMModelPref: process.env.NATIVE_LLM_MODEL_PREF,
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@ -0,0 +1,208 @@
 const { chatPrompt } = require("../../chats");
 // Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
 class OllamaAILLM {
  constructor(embedder = null) {
    if (!process.env.OLLAMA_BASE_PATH)
      throw new Error("No Ollama Base Path was set.");
    this.basePath = process.env.OLLAMA_BASE_PATH;
    this.model = process.env.OLLAMA_MODEL_PREF;
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
      system: this.promptWindowLimit() * 0.15,
      user: this.promptWindowLimit() * 0.7,
    };
    if (!embedder)
      throw new Error(
        "INVALID OLLAMA SETUP. No embedding engine has been set. Go to instance settings and set up an embedding interface to use Ollama as your LLM."
      );
    this.embedder = embedder;
  }
  streamingEnabled() {
    return "streamChat" in this && "streamGetChatCompletion" in this;
  }
  // Ensure the user set a value for the token limit
  // and if undefined - assume 4096 window.
  promptWindowLimit() {
    const limit = process.env.OLLAMA_MODEL_TOKEN_LIMIT || 4096;
    if (!limit || isNaN(Number(limit)))
      throw new Error("No Ollama token context limit was set.");
    return Number(limit);
  }
  async isValidChatCompletionModel(_ = "") {
    return true;
  }
  constructPrompt({
    systemPrompt = "",
    contextTexts = [],
    chatHistory = [],
    userPrompt = "",
  }) {
    const prompt = {
      role: "system",
      content: `${systemPrompt}
 Context:
    ${contextTexts
      .map((text, i) => {
        return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
      })
      .join("")}`,
    };
    return [prompt, ...chatHistory, { role: "user", content: userPrompt }];
  }
  async isSafe(_input = "") {
    // Not implemented so must be stubbed
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    const textResponse = await fetch(`${this.basePath}/api/chat`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        model: this.model,
        stream: false,
        options: {
          temperature: Number(workspace?.openAiTemp ?? 0.7),
        },
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      }),
    })
      .then((res) => {
        if (!res.ok)
          throw new Error(`Ollama:sendChat ${res.status} ${res.statusText}`);
        return res.json();
      })
      .then((data) => data?.message?.content)
      .catch((e) => {
        console.error(e);
        throw new Error(`Ollama::sendChat failed with: ${error.message}`);
      });
    if (!textResponse.length)
      throw new Error(`Ollama::sendChat text response was empty.`);
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    const response = await fetch(`${this.basePath}/api/chat`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        model: this.model,
        stream: true,
        options: {
          temperature: Number(workspace?.openAiTemp ?? 0.7),
        },
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      }),
    }).catch((e) => {
      console.error(e);
      throw new Error(`Ollama:streamChat ${error.message}`);
    });
    return { type: "ollamaStream", response };
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    const textResponse = await fetch(`${this.basePath}/api/chat`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        model: this.model,
        messages,
        stream: false,
        options: {
          temperature,
        },
      }),
    })
      .then((res) => {
        if (!res.ok)
          throw new Error(
            `Ollama:getChatCompletion ${res.status} ${res.statusText}`
          );
        return res.json();
      })
      .then((data) => data?.message?.content)
      .catch((e) => {
        console.error(e);
        throw new Error(
          `Ollama::getChatCompletion failed with: ${error.message}`
        );
      });
    if (!textResponse.length)
      throw new Error(`Ollama::getChatCompletion text response was empty.`);
    return textResponse;
  }
  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
    const response = await fetch(`${this.basePath}/api/chat`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        model: this.model,
        stream: true,
        messages,
        options: {
          temperature,
        },
      }),
    }).catch((e) => {
      console.error(e);
      throw new Error(`Ollama:streamGetChatCompletion ${error.message}`);
    });
    return { type: "ollamaStream", response };
  }
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  async embedTextInput(textInput) {
    return await this.embedder.embedTextInput(textInput);
  }
  async embedChunks(textChunks = []) {
    return await this.embedder.embedChunks(textChunks);
  }
  async compressMessages(promptArgs = {}, rawHistory = []) {
    const { messageArrayCompressor } = require("../../helpers/chat");
    const messageArray = this.constructPrompt(promptArgs);
    return await messageArrayCompressor(this, messageArray, rawHistory);
  }
 }
 module.exports = {
  OllamaAILLM,
 };
--- a/server/utils/chats/stream.js
+++ b/server/utils/chats/stream.js
@ -199,6 +199,7 @@ async function streamEmptyEmbeddingChat({
  return;
 }
 // TODO: Refactor this implementation
 function handleStreamResponses(response, stream, responseProps) {
  const { uuid = uuidv4(), sources = [] } = responseProps;
@ -231,6 +232,34 @@ function handleStreamResponses(response, stream, responseProps) {
    });
  }
  if (stream?.type === "ollamaStream") {
    return new Promise(async (resolve) => {
      let fullText = "";
      for await (const dataChunk of stream.response.body) {
        const chunk = JSON.parse(Buffer.from(dataChunk).toString());
        fullText += chunk.message.content;
        writeResponseChunk(response, {
          uuid,
          sources: [],
          type: "textResponseChunk",
          textResponse: chunk.message.content,
          close: false,
          error: false,
        });
      }
      writeResponseChunk(response, {
        uuid,
        sources,
        type: "textResponseChunk",
        textResponse: "",
        close: true,
        error: false,
      });
      resolve(fullText);
    });
  }
  // If stream is not a regular OpenAI Stream (like if using native model)
  // we can just iterate the stream content instead.
  if (!stream.hasOwnProperty("data")) {
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -1,4 +1,4 @@
-const SUPPORT_CUSTOM_MODELS = ["openai", "localai", "native-llm"];
+const SUPPORT_CUSTOM_MODELS = ["openai", "localai", "ollama", "native-llm"];
 async function getCustomModels(provider = "", apiKey = null, basePath = null) {
  if (!SUPPORT_CUSTOM_MODELS.includes(provider))
@ -9,6 +9,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
      return await openAiModels(apiKey);
    case "localai":
      return await localAIModels(basePath, apiKey);
    case "ollama":
      return await ollamaAIModels(basePath, apiKey);
    case "native-llm":
      return nativeLLMModels();
    default:
@ -59,6 +61,37 @@ async function localAIModels(basePath = null, apiKey = null) {
  return { models, error: null };
 }
 async function ollamaAIModels(basePath = null, _apiKey = null) {
  let url;
  try {
    new URL(basePath);
    if (basePath.split("").slice(-1)?.[0] === "/")
      throw new Error("BasePath Cannot end in /!");
    url = basePath;
  } catch {
    return { models: [], error: "Not a valid URL." };
  }
  const models = await fetch(`${url}/api/tags`)
    .then((res) => {
      if (!res.ok)
        throw new Error(`Could not reach Ollama server! ${res.status}`);
      return res.json();
    })
    .then((data) => data?.models || [])
    .then((models) =>
      models.map((model) => {
        return { id: model.name };
      })
    )
    .catch((e) => {
      console.error(e);
      return [];
    });
  return { models, error: null };
 }
 function nativeLLMModels() {
  const fs = require("fs");
  const path = require("path");
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@ -43,6 +43,9 @@ function getLLMProvider() {
    case "localai":
      const { LocalAiLLM } = require("../AiProviders/localAi");
      return new LocalAiLLM(embedder);
    case "ollama":
      const { OllamaAILLM } = require("../AiProviders/ollama");
      return new OllamaAILLM(embedder);
    case "native":
      const { NativeLLM } = require("../AiProviders/native");
      return new NativeLLM(embedder);
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -81,6 +81,19 @@ const KEY_MAPPING = {
    checks: [],
  },
  OllamaLLMBasePath: {
    envKey: "OLLAMA_BASE_PATH",
    checks: [isNotEmpty, validOllamaLLMBasePath],
  },
  OllamaLLMModelPref: {
    envKey: "OLLAMA_MODEL_PREF",
    checks: [],
  },
  OllamaLLMTokenLimit: {
    envKey: "OLLAMA_MODEL_TOKEN_LIMIT",
    checks: [nonZero],
  },
  // Native LLM Settings
  NativeLLMModelPref: {
    envKey: "NATIVE_LLM_MODEL_PREF",
@ -208,6 +221,17 @@ function validLLMExternalBasePath(input = "") {
  }
 }
 function validOllamaLLMBasePath(input = "") {
  try {
    new URL(input);
    if (input.split("").slice(-1)?.[0] === "/")
      return "URL cannot end with a slash";
    return null;
  } catch {
    return "Not a valid URL";
  }
 }
 function supportedLLM(input = "") {
  return [
    "openai",
@ -216,6 +240,7 @@ function supportedLLM(input = "") {
    "gemini",
    "lmstudio",
    "localai",
    "ollama",
    "native",
  ].includes(input);
 }