Support LocalAi as LLM provider by @tlandenberger (#373)

* feature: add LocalAI as llm provider * update Onboarding/mgmt settings Grab models from models endpoint for localai merge with master * update streaming for complete chunk streaming update localAI LLM to be able to stream * force schema on URL --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> Co-authored-by: tlandenberger <tobiaslandenberger@gmail.com>
2024-11-19 20:50:09 +01:00 · 2023-11-14 12:31:44 -08:00 · 2023-11-14 12:31:44 -08:00 · 4bb99ab4bf
commit 4bb99ab4bf
parent 6957bc3ec0
15 changed files with 442 additions and 12 deletions
--- a/docker/.env.example
+++ b/docker/.env.example
@ -23,6 +23,11 @@ CACHE_VECTORS="true"
 # LMSTUDIO_BASE_PATH='http://your-server:1234/v1'
 # LMSTUDIO_MODEL_TOKEN_LIMIT=4096
 # LLM_PROVIDER='localai'
 # LOCAL_AI_BASE_PATH='http://host.docker.internal:8080/v1'
 # LOCAL_AI_MODEL_PREF='luna-ai-llama2'
 # LOCAL_AI_MODEL_TOKEN_LIMIT=4096
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
--- a/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx
@ -0,0 +1,140 @@
 import { useEffect, useState } from "react";
 import { Info } from "@phosphor-icons/react";
 import paths from "../../../utils/paths";
 import System from "../../../models/system";
 export default function LocalAiOptions({ settings, showAlert = false }) {
  const [basePathValue, setBasePathValue] = useState(settings?.LocalAiBasePath);
  const [basePath, setBasePath] = useState(settings?.LocalAiBasePath);
  function updateBasePath() {
    setBasePath(basePathValue);
  }
  return (
    <div className="w-full flex flex-col">
      {showAlert && (
        <div className="flex flex-col md:flex-row md:items-center gap-x-2 text-white mb-6 bg-blue-800/30 w-fit rounded-lg px-4 py-2">
          <div className="gap-x-2 flex items-center">
            <Info size={12} className="hidden md:visible" />
            <p className="text-sm md:text-base">
              LocalAI as your LLM requires you to set an embedding service to
              use.
            </p>
          </div>
          <a
            href={paths.settings.embeddingPreference()}
            className="text-sm md:text-base my-2 underline"
          >
            Manage embedding &rarr;
          </a>
        </div>
      )}
      <div className="w-full flex items-center gap-4">
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-4">
            Local AI Base URL
          </label>
          <input
            type="url"
            name="LocalAiBasePath"
            className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
            placeholder="http://localhost:1234/v1"
            defaultValue={settings?.LocalAiBasePath}
            required={true}
            autoComplete="off"
            spellCheck={false}
            onChange={(e) => setBasePathValue(e.target.value)}
            onBlur={updateBasePath}
          />
        </div>
        <LocalAIModelSelection settings={settings} basePath={basePath} />
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-4">
            Token context window
          </label>
          <input
            type="number"
            name="LocalAiTokenLimit"
            className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
            placeholder="4096"
            min={1}
            onScroll={(e) => e.target.blur()}
            defaultValue={settings?.LocalAiTokenLimit}
            required={true}
            autoComplete="off"
          />
        </div>
      </div>
    </div>
  );
 }
 function LocalAIModelSelection({ settings, basePath = null }) {
  const [customModels, setCustomModels] = useState([]);
  const [loading, setLoading] = useState(true);
  useEffect(() => {
    async function findCustomModels() {
      if (!basePath || !basePath.includes("/v1")) {
        setCustomModels([]);
        setLoading(false);
        return;
      }
      setLoading(true);
      const { models } = await System.customModels("localai", null, basePath);
      setCustomModels(models || []);
      setLoading(false);
    }
    findCustomModels();
  }, [basePath]);
  if (loading || customModels.length == 0) {
    return (
      <div className="flex flex-col w-60">
        <label className="text-white text-sm font-semibold block mb-4">
          Chat Model Selection
        </label>
        <select
          name="LocalAiModelPref"
          disabled={true}
          className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
        >
          <option disabled={true} selected={true}>
            {basePath?.includes("/v1")
              ? "-- loading available models --"
              : "-- waiting for URL --"}
          </option>
        </select>
      </div>
    );
  }
  return (
    <div className="flex flex-col w-60">
      <label className="text-white text-sm font-semibold block mb-4">
        Chat Model Selection
      </label>
      <select
        name="LocalAiModelPref"
        required={true}
        className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
      >
        {customModels.length > 0 && (
          <optgroup label="Your loaded models">
            {customModels.map((model) => {
              return (
                <option
                  key={model.id}
                  value={model.id}
                  selected={settings.LocalAiModelPref === model.id}
                >
                  {model.id}
                </option>
              );
            })}
          </optgroup>
        )}
      </select>
    </div>
  );
 }
--- a/frontend/src/media/llmprovider/localai.png
+++ b/frontend/src/media/llmprovider/localai.png
--- a/frontend/src/models/system.js
+++ b/frontend/src/models/system.js
@ -319,13 +319,14 @@ const System = {
        return false;
      });
  },
-  customModels: async function (provider, apiKey) {
+  customModels: async function (provider, apiKey = null, basePath = null) {
    return fetch(`${API_BASE}/system/custom-models`, {
      method: "POST",
      headers: baseHeaders(),
      body: JSON.stringify({
        provider,
        apiKey,
        basePath,
      }),
    })
      .then((res) => {
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@ -9,12 +9,14 @@ import OpenAiLogo from "../../../media/llmprovider/openai.png";
 import AzureOpenAiLogo from "../../../media/llmprovider/azure.png";
 import AnthropicLogo from "../../../media/llmprovider/anthropic.png";
 import LMStudioLogo from "../../../media/llmprovider/lmstudio.png";
 import LocalAiLogo from "../../../media/llmprovider/localai.png";
 import PreLoader from "../../../components/Preloader";
 import LLMProviderOption from "../../../components/LLMSelection/LLMProviderOption";
 import OpenAiOptions from "../../../components/LLMSelection/OpenAiOptions";
 import AzureAiOptions from "../../../components/LLMSelection/AzureAiOptions";
 import AnthropicAiOptions from "../../../components/LLMSelection/AnthropicAiOptions";
 import LMStudioOptions from "../../../components/LLMSelection/LMStudioOptions";
 import LocalAiOptions from "../../../components/LLMSelection/LocalAiOptions";
 export default function GeneralLLMPreference() {
  const [saving, setSaving] = useState(false);
@ -141,6 +143,15 @@ export default function GeneralLLMPreference() {
                  image={LMStudioLogo}
                  onClick={updateLLMChoice}
                />
                <LLMProviderOption
                  name="Local AI"
                  value="localai"
                  link="localai.io"
                  description="Run LLMs locally on your own machine."
                  checked={llmChoice === "localai"}
                  image={LocalAiLogo}
                  onClick={updateLLMChoice}
                />
              </div>
              <div className="mt-10 flex flex-wrap gap-4 max-w-[800px]">
                {llmChoice === "openai" && (
@ -155,6 +166,9 @@ export default function GeneralLLMPreference() {
                {llmChoice === "lmstudio" && (
                  <LMStudioOptions settings={settings} showAlert={true} />
                )}
                {llmChoice === "localai" && (
                  <LocalAiOptions settings={settings} showAlert={true} />
                )}
              </div>
            </div>
          </form>
--- a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx
@ -4,6 +4,7 @@ import OpenAiLogo from "../../../../../media/llmprovider/openai.png";
 import AzureOpenAiLogo from "../../../../../media/llmprovider/azure.png";
 import AnthropicLogo from "../../../../../media/llmprovider/anthropic.png";
 import LMStudioLogo from "../../../../../media/llmprovider/lmstudio.png";
 import LocalAiLogo from "../../../../../media/llmprovider/localai.png";
 import ChromaLogo from "../../../../../media/vectordbs/chroma.png";
 import PineconeLogo from "../../../../../media/vectordbs/pinecone.png";
 import LanceDbLogo from "../../../../../media/vectordbs/lancedb.png";
@ -43,6 +44,13 @@ const LLM_SELECTION_PRIVACY = {
    ],
    logo: LMStudioLogo,
  },
  localai: {
    name: "LocalAI",
    description: [
      "Your model and chats are only accessible on the server running LocalAI",
    ],
    logo: LocalAiLogo,
  },
 };
 const VECTOR_DB_PRIVACY = {
--- a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx
@ -3,6 +3,7 @@ import OpenAiLogo from "../../../../../media/llmprovider/openai.png";
 import AzureOpenAiLogo from "../../../../../media/llmprovider/azure.png";
 import AnthropicLogo from "../../../../../media/llmprovider/anthropic.png";
 import LMStudioLogo from "../../../../../media/llmprovider/lmstudio.png";
 import LocalAiLogo from "../../../../../media/llmprovider/localai.png";
 import System from "../../../../../models/system";
 import PreLoader from "../../../../../components/Preloader";
 import LLMProviderOption from "../../../../../components/LLMSelection/LLMProviderOption";
@ -10,6 +11,7 @@ import OpenAiOptions from "../../../../../components/LLMSelection/OpenAiOptions"
 import AzureAiOptions from "../../../../../components/LLMSelection/AzureAiOptions";
 import AnthropicAiOptions from "../../../../../components/LLMSelection/AnthropicAiOptions";
 import LMStudioOptions from "../../../../../components/LLMSelection/LMStudioOptions";
 import LocalAiOptions from "../../../../../components/LLMSelection/LocalAiOptions";
 function LLMSelection({ nextStep, prevStep, currentStep }) {
  const [llmChoice, setLLMChoice] = useState("openai");
@ -47,8 +49,8 @@ function LLMSelection({ nextStep, prevStep, currentStep }) {
    switch (data.LLMProvider) {
      case "anthropic":
        return nextStep("embedding_preferences");
      case "lmstudio":
      case "localai":
        return nextStep("embedding_preferences");
      default:
        return nextStep("vector_database");
@ -107,6 +109,15 @@ function LLMSelection({ nextStep, prevStep, currentStep }) {
              image={LMStudioLogo}
              onClick={updateLLMChoice}
            />
            <LLMProviderOption
              name="Local AI"
              value="localai"
              link="localai.io"
              description="Run LLMs locally on your own machine."
              checked={llmChoice === "localai"}
              image={LocalAiLogo}
              onClick={updateLLMChoice}
            />
          </div>
          <div className="mt-10 flex flex-wrap gap-4 max-w-[800px]">
            {llmChoice === "openai" && <OpenAiOptions settings={settings} />}
@ -117,6 +128,7 @@ function LLMSelection({ nextStep, prevStep, currentStep }) {
            {llmChoice === "lmstudio" && (
              <LMStudioOptions settings={settings} />
            )}
            {llmChoice === "localai" && <LocalAiOptions settings={settings} />}
          </div>
        </div>
        <div className="flex w-full justify-between items-center p-6 space-x-2 border-t rounded-b border-gray-500/50">
--- a/server/.env.example
+++ b/server/.env.example
@ -23,6 +23,11 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
 # LMSTUDIO_BASE_PATH='http://your-server:1234/v1'
 # LMSTUDIO_MODEL_TOKEN_LIMIT=4096
 # LLM_PROVIDER='localai'
 # LOCAL_AI_BASE_PATH='http://localhost:8080/v1'
 # LOCAL_AI_MODEL_PREF='luna-ai-llama2'
 # LOCAL_AI_MODEL_TOKEN_LIMIT=4096
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
--- a/server/endpoints/system.js
+++ b/server/endpoints/system.js
@ -615,8 +615,12 @@ function systemEndpoints(app) {
    [validatedRequest],
    async (request, response) => {
      try {
-        const { provider, apiKey } = reqBody(request);
+        const { provider, apiKey = null, basePath = null } = reqBody(request);
-        const { models, error } = await getCustomModels(provider, apiKey);
+        const { models, error } = await getCustomModels(
          provider,
          apiKey,
          basePath
        );
        return response.status(200).json({
          models,
          error,
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -94,6 +94,20 @@ const SystemSettings = {
            AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
          }
        : {}),
      ...(llmProvider === "localai"
        ? {
            LocalAiBasePath: process.env.LOCAL_AI_BASE_PATH,
            LocalAiModelPref: process.env.LOCAL_AI_MODEL_PREF,
            LocalAiTokenLimit: process.env.LOCAL_AI_MODEL_TOKEN_LIMIT,
            // For embedding credentials when localai is selected.
            OpenAiKey: !!process.env.OPEN_AI_KEY,
            AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT,
            AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY,
            AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
          }
        : {}),
    };
  },
--- a/server/utils/AiProviders/localAi/index.js
+++ b/server/utils/AiProviders/localAi/index.js
@ -0,0 +1,182 @@
 const { chatPrompt } = require("../../chats");
 class LocalAiLLM {
  constructor(embedder = null) {
    if (!process.env.LOCAL_AI_BASE_PATH)
      throw new Error("No LocalAI Base Path was set.");
    const { Configuration, OpenAIApi } = require("openai");
    const config = new Configuration({
      basePath: process.env.LOCAL_AI_BASE_PATH,
    });
    this.openai = new OpenAIApi(config);
    this.model = process.env.LOCAL_AI_MODEL_PREF;
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
      system: this.promptWindowLimit() * 0.15,
      user: this.promptWindowLimit() * 0.7,
    };
    if (!embedder)
      throw new Error(
        "INVALID LOCAL AI SETUP. No embedding engine has been set. Go to instance settings and set up an embedding interface to use LocalAI as your LLM."
      );
    this.embedder = embedder;
  }
  streamingEnabled() {
    return "streamChat" in this && "streamGetChatCompletion" in this;
  }
  // Ensure the user set a value for the token limit
  // and if undefined - assume 4096 window.
  promptWindowLimit() {
    const limit = process.env.LOCAL_AI_MODEL_TOKEN_LIMIT || 4096;
    if (!limit || isNaN(Number(limit)))
      throw new Error("No LocalAi token context limit was set.");
    return Number(limit);
  }
  async isValidChatCompletionModel(_ = "") {
    return true;
  }
  constructPrompt({
    systemPrompt = "",
    contextTexts = [],
    chatHistory = [],
    userPrompt = "",
  }) {
    const prompt = {
      role: "system",
      content: `${systemPrompt}
 Context:
    ${contextTexts
      .map((text, i) => {
        return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
      })
      .join("")}`,
    };
    return [prompt, ...chatHistory, { role: "user", content: userPrompt }];
  }
  async isSafe(_input = "") {
    // Not implemented so must be stubbed
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `LocalAI chat: ${this.model} is not valid for chat completion!`
      );
    const textResponse = await this.openai
      .createChatCompletion({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? 0.7),
        n: 1,
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      })
      .then((json) => {
        const res = json.data;
        if (!res.hasOwnProperty("choices"))
          throw new Error("LocalAI chat: No results!");
        if (res.choices.length === 0)
          throw new Error("LocalAI chat: No results length!");
        return res.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
          `LocalAI::createChatCompletion failed with: ${error.message}`
        );
      });
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `LocalAI chat: ${this.model} is not valid for chat completion!`
      );
    const streamRequest = await this.openai.createChatCompletion(
      {
        model: this.model,
        stream: true,
        temperature: Number(workspace?.openAiTemp ?? 0.7),
        n: 1,
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      },
      { responseType: "stream" }
    );
    return streamRequest;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `LocalAI chat: ${this.model} is not valid for chat completion!`
      );
    const { data } = await this.openai.createChatCompletion({
      model: this.model,
      messages,
      temperature,
    });
    if (!data.hasOwnProperty("choices")) return null;
    return data.choices[0].message.content;
  }
  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `LocalAi chat: ${this.model} is not valid for chat completion!`
      );
    const streamRequest = await this.openai.createChatCompletion(
      {
        model: this.model,
        stream: true,
        messages,
        temperature,
      },
      { responseType: "stream" }
    );
    return streamRequest;
  }
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  async embedTextInput(textInput) {
    return await this.embedder.embedTextInput(textInput);
  }
  async embedChunks(textChunks = []) {
    return await this.embedder.embedChunks(textChunks);
  }
  async compressMessages(promptArgs = {}, rawHistory = []) {
    const { messageArrayCompressor } = require("../../helpers/chat");
    const messageArray = this.constructPrompt(promptArgs);
    return await messageArrayCompressor(this, messageArray, rawHistory);
  }
 }
 module.exports = {
  LocalAiLLM,
 };
--- a/server/utils/chats/stream.js
+++ b/server/utils/chats/stream.js
@ -211,12 +211,18 @@ function handleStreamResponses(response, stream, responseProps) {
        .filter((line) => line.trim() !== "");
      for (const line of lines) {
        let validJSON = false;
        const message = chunk + line.replace(/^data: /, "");
        // JSON chunk is incomplete and has not ended yet
        // so we need to stitch it together. You would think JSON
        // chunks would only come complete - but they don't!
-        if (message.slice(-3) !== "}]}") {
+        try {
          JSON.parse(message);
          validJSON = true;
        } catch {}
        if (!validJSON) {
          chunk += message;
          continue;
        } else {
@ -234,12 +240,12 @@ function handleStreamResponses(response, stream, responseProps) {
          });
          resolve(fullText);
        } else {
-          let finishReason;
+          let finishReason = null;
          let token = "";
          try {
            const json = JSON.parse(message);
            token = json?.choices?.[0]?.delta?.content;
-            finishReason = json?.choices?.[0]?.finish_reason;
+            finishReason = json?.choices?.[0]?.finish_reason || null;
          } catch {
            continue;
          }
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -1,12 +1,14 @@
-const SUPPORT_CUSTOM_MODELS = ["openai"];
+const SUPPORT_CUSTOM_MODELS = ["openai", "localai"];
-async function getCustomModels(provider = "", apiKey = null) {
+async function getCustomModels(provider = "", apiKey = null, basePath = null) {
  if (!SUPPORT_CUSTOM_MODELS.includes(provider))
    return { models: [], error: "Invalid provider for custom models" };
  switch (provider) {
    case "openai":
      return await openAiModels(apiKey);
    case "localai":
      return await localAIModels(basePath);
    default:
      return { models: [], error: "Invalid provider for custom models" };
  }
@ -33,6 +35,23 @@ async function openAiModels(apiKey = null) {
  return { models, error: null };
 }
 async function localAIModels(basePath = null) {
  const { Configuration, OpenAIApi } = require("openai");
  const config = new Configuration({
    basePath,
  });
  const openai = new OpenAIApi(config);
  const models = await openai
    .listModels()
    .then((res) => res.data.data)
    .catch((e) => {
      console.error(`LocalAI:listModels`, e.message);
      return [];
    });
  return { models, error: null };
 }
 module.exports = {
  getCustomModels,
 };
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@ -39,6 +39,10 @@ function getLLMProvider() {
      const { LMStudioLLM } = require("../AiProviders/lmStudio");
      embedder = getEmbeddingEngineSelection();
      return new LMStudioLLM(embedder);
    case "localai":
      const { LocalAiLLM } = require("../AiProviders/localAi");
      embedder = getEmbeddingEngineSelection();
      return new LocalAiLLM(embedder);
    default:
      throw new Error("ENV: No LLM_PROVIDER value found in environment!");
  }
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -47,13 +47,27 @@ const KEY_MAPPING = {
  // LMStudio Settings
  LMStudioBasePath: {
    envKey: "LMSTUDIO_BASE_PATH",
-    checks: [isNotEmpty, validLMStudioBasePath],
+    checks: [isNotEmpty, validLLMExternalBasePath],
  },
  LMStudioTokenLimit: {
    envKey: "LMSTUDIO_MODEL_TOKEN_LIMIT",
    checks: [nonZero],
  },
  // LocalAI Settings
  LocalAiBasePath: {
    envKey: "LOCAL_AI_BASE_PATH",
    checks: [isNotEmpty, validLLMExternalBasePath],
  },
  LocalAiModelPref: {
    envKey: "LOCAL_AI_MODEL_PREF",
    checks: [],
  },
  LocalAiTokenLimit: {
    envKey: "LOCAL_AI_MODEL_TOKEN_LIMIT",
    checks: [nonZero],
  },
  EmbeddingEngine: {
    envKey: "EMBEDDING_ENGINE",
    checks: [supportedEmbeddingModel],
@ -151,7 +165,7 @@ function validAnthropicApiKey(input = "") {
    : "Anthropic Key must start with sk-ant-";
 }
-function validLMStudioBasePath(input = "") {
+function validLLMExternalBasePath(input = "") {
  try {
    new URL(input);
    if (!input.includes("v1")) return "URL must include /v1";
@ -164,7 +178,9 @@ function validLMStudioBasePath(input = "") {
 }
 function supportedLLM(input = "") {
-  return ["openai", "azure", "anthropic", "lmstudio"].includes(input);
+  return ["openai", "azure", "anthropic", "lmstudio", "localai"].includes(
    input
  );
 }
 function validAnthropicModel(input = "") {