Merge branch 'master' into 1214-feat-implement-width-and-height-resizing-options-for-embed-chat-widget

2024-11-14 10:30:10 +01:00 · 2024-05-03 13:46:11 -07:00 · 2024-05-03 13:46:11 -07:00 · ab63385a59
commit ab63385a59
parent 6c619b5f88 0eb16f2c60
17 changed files with 415 additions and 17 deletions
--- a/README.md
+++ b/README.md
@ -5,7 +5,7 @@
 </p>
 <p align="center">
-    <b>AnythingLLM: The all-in-one AI app you were looking for.<br />
+    <b>AnythingLLM:</b> The all-in-one AI app you were looking for.<br />
    Chat with your docs, use AI Agents, hyper-configurable, multi-user, & no fustrating set up required.
 </p>
--- a/collector/utils/WhisperProviders/OpenAiWhisper.js
+++ b/collector/utils/WhisperProviders/OpenAiWhisper.js
@ -22,7 +22,6 @@ class OpenAiWhisper {
      .create({
        file: fs.createReadStream(fullFilePath),
        model: this.model,
        model: "whisper-1",
        response_format: "text",
        temperature: this.temperature,
      })
--- a/collector/utils/extensions/Confluence/index.js
+++ b/collector/utils/extensions/Confluence/index.js
@ -66,11 +66,17 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
  const outFolder = slugify(
    `${subdomain}-confluence-${v4().slice(0, 4)}`
  ).toLowerCase();
-  const outFolderPath = path.resolve(
+
  const outFolderPath =
    process.env.NODE_ENV === "development"
      ? path.resolve(
          __dirname,
          `../../../../server/storage/documents/${outFolder}`
-  );
+        )
-  fs.mkdirSync(outFolderPath);
+      : path.resolve(process.env.STORAGE_DIR, `documents/${outFolder}`);
  if (!fs.existsSync(outFolderPath))
    fs.mkdirSync(outFolderPath, { recursive: true });
  docs.forEach((doc) => {
    const data = {
--- a/collector/utils/extensions/GithubRepo/index.js
+++ b/collector/utils/extensions/GithubRepo/index.js
@ -31,11 +31,17 @@ async function loadGithubRepo(args) {
  const outFolder = slugify(
    `${repo.author}-${repo.project}-${repo.branch}-${v4().slice(0, 4)}`
  ).toLowerCase();
-  const outFolderPath = path.resolve(
+
  const outFolderPath =
    process.env.NODE_ENV === "development"
      ? path.resolve(
          __dirname,
          `../../../../server/storage/documents/${outFolder}`
-  );
+        )
-  fs.mkdirSync(outFolderPath);
+      : path.resolve(process.env.STORAGE_DIR, `documents/${outFolder}`);
  if (!fs.existsSync(outFolderPath))
    fs.mkdirSync(outFolderPath, { recursive: true });
  for (const doc of docs) {
    if (!doc.pageContent) continue;
--- a/collector/utils/extensions/YoutubeTranscript/index.js
+++ b/collector/utils/extensions/YoutubeTranscript/index.js
@ -67,11 +67,17 @@ async function loadYouTubeTranscript({ url }) {
  const outFolder = slugify(
    `${metadata.author} YouTube transcripts`
  ).toLowerCase();
-  const outFolderPath = path.resolve(
+
  const outFolderPath =
    process.env.NODE_ENV === "development"
      ? path.resolve(
          __dirname,
          `../../../../server/storage/documents/${outFolder}`
-  );
+        )
-  if (!fs.existsSync(outFolderPath)) fs.mkdirSync(outFolderPath);
+      : path.resolve(process.env.STORAGE_DIR, `documents/${outFolder}`);
  if (!fs.existsSync(outFolderPath))
    fs.mkdirSync(outFolderPath, { recursive: true });
  const data = {
    id: v4(),
--- a/docker/.env.example
+++ b/docker/.env.example
@ -66,6 +66,11 @@ GID='1000'
 # GROQ_API_KEY=gsk_abcxyz
 # GROQ_MODEL_PREF=llama3-8b-8192
 # LLM_PROVIDER='koboldcpp'
 # KOBOLD_CPP_BASE_PATH='http://127.0.0.1:5000/v1'
 # KOBOLD_CPP_MODEL_PREF='koboldcpp/codellama-7b-instruct.Q4_K_S'
 # KOBOLD_CPP_MODEL_TOKEN_LIMIT=4096
 # LLM_PROVIDER='generic-openai'
 # GENERIC_OPEN_AI_BASE_PATH='http://proxy.url.openai.com/v1'
 # GENERIC_OPEN_AI_MODEL_PREF='gpt-3.5-turbo'
--- a/frontend/src/components/LLMSelection/KoboldCPPOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/KoboldCPPOptions/index.jsx
@ -0,0 +1,112 @@
 import { useState, useEffect } from "react";
 import System from "@/models/system";
 export default function KoboldCPPOptions({ settings }) {
  const [basePathValue, setBasePathValue] = useState(
    settings?.KoboldCPPBasePath
  );
  const [basePath, setBasePath] = useState(settings?.KoboldCPPBasePath);
  return (
    <div className="flex gap-4 flex-wrap">
      <div className="flex flex-col w-60">
        <label className="text-white text-sm font-semibold block mb-4">
          Base URL
        </label>
        <input
          type="url"
          name="KoboldCPPBasePath"
          className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
          placeholder="http://127.0.0.1:5000/v1"
          defaultValue={settings?.KoboldCPPBasePath}
          required={true}
          autoComplete="off"
          spellCheck={false}
          onChange={(e) => setBasePathValue(e.target.value)}
          onBlur={() => setBasePath(basePathValue)}
        />
      </div>
      <KoboldCPPModelSelection settings={settings} basePath={basePath} />
      <div className="flex flex-col w-60">
        <label className="text-white text-sm font-semibold block mb-4">
          Token context window
        </label>
        <input
          type="number"
          name="KoboldCPPTokenLimit"
          className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
          placeholder="4096"
          min={1}
          onScroll={(e) => e.target.blur()}
          defaultValue={settings?.KoboldCPPTokenLimit}
          required={true}
          autoComplete="off"
        />
      </div>
    </div>
  );
 }
 function KoboldCPPModelSelection({ settings, basePath = null }) {
  const [customModels, setCustomModels] = useState([]);
  const [loading, setLoading] = useState(true);
  useEffect(() => {
    async function findCustomModels() {
      if (!basePath || !basePath.includes("/v1")) {
        setCustomModels([]);
        setLoading(false);
        return;
      }
      setLoading(true);
      const { models } = await System.customModels("koboldcpp", null, basePath);
      setCustomModels(models || []);
      setLoading(false);
    }
    findCustomModels();
  }, [basePath]);
  if (loading || customModels.length === 0) {
    return (
      <div className="flex flex-col w-60">
        <label className="text-white text-sm font-semibold block mb-4">
          Chat Model Selection
        </label>
        <select
          name="KoboldCPPModelPref"
          disabled={true}
          className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
        >
          <option disabled={true} selected={true}>
            {basePath?.includes("/v1")
              ? "-- loading available models --"
              : "-- waiting for URL --"}
          </option>
        </select>
      </div>
    );
  }
  return (
    <div className="flex flex-col w-60">
      <label className="text-white text-sm font-semibold block mb-4">
        Chat Model Selection
      </label>
      <select
        name="KoboldCPPModelPref"
        required={true}
        className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
      >
        {customModels.map((model) => (
          <option
            key={model.id}
            value={model.id}
            selected={settings?.KoboldCPPModelPref === model.id}
          >
            {model.id}
          </option>
        ))}
      </select>
    </div>
  );
 }
--- a/frontend/src/media/llmprovider/koboldcpp.png
+++ b/frontend/src/media/llmprovider/koboldcpp.png
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@ -18,6 +18,7 @@ import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import PerplexityLogo from "@/media/llmprovider/perplexity.png";
 import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
 import GroqLogo from "@/media/llmprovider/groq.png";
 import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png";
 import CohereLogo from "@/media/llmprovider/cohere.png";
 import PreLoader from "@/components/Preloader";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
@ -40,6 +41,7 @@ import CohereAiOptions from "@/components/LLMSelection/CohereAiOptions";
 import LLMItem from "@/components/LLMSelection/LLMItem";
 import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
 import CTAButton from "@/components/lib/CTAButton";
 import KoboldCPPOptions from "@/components/LLMSelection/KoboldCPPOptions";
 export const AVAILABLE_LLM_PROVIDERS = [
  {
@ -154,6 +156,18 @@ export const AVAILABLE_LLM_PROVIDERS = [
      "The fastest LLM inferencing available for real-time AI applications.",
    requiredConfig: ["GroqApiKey"],
  },
  {
    name: "KoboldCPP",
    value: "koboldcpp",
    logo: KoboldCPPLogo,
    options: (settings) => <KoboldCPPOptions settings={settings} />,
    description: "Run local LLMs using koboldcpp.",
    requiredConfig: [
      "KoboldCPPModelPref",
      "KoboldCPPBasePath",
      "KoboldCPPTokenLimit",
    ],
  },
  {
    name: "Cohere",
    value: "cohere",
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@ -15,6 +15,7 @@ import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import PerplexityLogo from "@/media/llmprovider/perplexity.png";
 import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
 import GroqLogo from "@/media/llmprovider/groq.png";
 import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png";
 import CohereLogo from "@/media/llmprovider/cohere.png";
 import ZillizLogo from "@/media/vectordbs/zilliz.png";
 import AstraDBLogo from "@/media/vectordbs/astraDB.png";
@ -138,6 +139,13 @@ export const LLM_SELECTION_PRIVACY = {
    ],
    logo: GroqLogo,
  },
  koboldcpp: {
    name: "KoboldCPP",
    description: [
      "Your model and chats are only accessible on the server running KoboldCPP",
    ],
    logo: KoboldCPPLogo,
  },
  "generic-openai": {
    name: "Generic OpenAI compatible service",
    description: [
--- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
@ -15,6 +15,7 @@ import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import PerplexityLogo from "@/media/llmprovider/perplexity.png";
 import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
 import GroqLogo from "@/media/llmprovider/groq.png";
 import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png";
 import CohereLogo from "@/media/llmprovider/cohere.png";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
 import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
@ -38,6 +39,7 @@ import System from "@/models/system";
 import paths from "@/utils/paths";
 import showToast from "@/utils/toast";
 import { useNavigate } from "react-router-dom";
 import KoboldCPPOptions from "@/components/LLMSelection/KoboldCPPOptions";
 const TITLE = "LLM Preference";
 const DESCRIPTION =
@ -102,6 +104,13 @@ const LLMS = [
    options: (settings) => <LocalAiOptions settings={settings} />,
    description: "Run LLMs locally on your own machine.",
  },
  {
    name: "KoboldCPP",
    value: "koboldcpp",
    logo: KoboldCPPLogo,
    options: (settings) => <KoboldCPPOptions settings={settings} />,
    description: "Run local LLMs using koboldcpp.",
  },
  {
    name: "Together AI",
    value: "togetherai",
--- a/server/.env.example
+++ b/server/.env.example
@ -63,6 +63,11 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
 # GROQ_API_KEY=gsk_abcxyz
 # GROQ_MODEL_PREF=llama3-8b-8192
 # LLM_PROVIDER='koboldcpp'
 # KOBOLD_CPP_BASE_PATH='http://127.0.0.1:5000/v1'
 # KOBOLD_CPP_MODEL_PREF='koboldcpp/codellama-7b-instruct.Q4_K_S'
 # KOBOLD_CPP_MODEL_TOKEN_LIMIT=4096
 # LLM_PROVIDER='generic-openai'
 # GENERIC_OPEN_AI_BASE_PATH='http://proxy.url.openai.com/v1'
 # GENERIC_OPEN_AI_MODEL_PREF='gpt-3.5-turbo'
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -359,6 +359,11 @@ const SystemSettings = {
      HuggingFaceLLMAccessToken: !!process.env.HUGGING_FACE_LLM_API_KEY,
      HuggingFaceLLMTokenLimit: process.env.HUGGING_FACE_LLM_TOKEN_LIMIT,
      // KoboldCPP Keys
      KoboldCPPModelPref: process.env.KOBOLD_CPP_MODEL_PREF,
      KoboldCPPBasePath: process.env.KOBOLD_CPP_BASE_PATH,
      KoboldCPPTokenLimit: process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT,
      // Generic OpenAI Keys
      GenericOpenAiBasePath: process.env.GENERIC_OPEN_AI_BASE_PATH,
      GenericOpenAiModelPref: process.env.GENERIC_OPEN_AI_MODEL_PREF,
--- a/server/utils/AiProviders/koboldCPP/index.js
+++ b/server/utils/AiProviders/koboldCPP/index.js
@ -0,0 +1,180 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const {
  clientAbortedHandler,
  writeResponseChunk,
 } = require("../../helpers/chat/responses");
 const { v4: uuidv4 } = require("uuid");
 class KoboldCPPLLM {
  constructor(embedder = null, modelPreference = null) {
    const { OpenAI: OpenAIApi } = require("openai");
    if (!process.env.KOBOLD_CPP_BASE_PATH)
      throw new Error(
        "KoboldCPP must have a valid base path to use for the api."
      );
    this.basePath = process.env.KOBOLD_CPP_BASE_PATH;
    this.openai = new OpenAIApi({
      baseURL: this.basePath,
      apiKey: null,
    });
    this.model = modelPreference ?? process.env.KOBOLD_CPP_MODEL_PREF ?? null;
    if (!this.model) throw new Error("KoboldCPP must have a valid model set.");
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
      system: this.promptWindowLimit() * 0.15,
      user: this.promptWindowLimit() * 0.7,
    };
    if (!embedder)
      console.warn(
        "No embedding provider defined for KoboldCPPLLM - falling back to NativeEmbedder for embedding!"
      );
    this.embedder = !embedder ? new NativeEmbedder() : embedder;
    this.defaultTemp = 0.7;
    this.log(`Inference API: ${this.basePath} Model: ${this.model}`);
  }
  log(text, ...args) {
    console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
  }
  #appendContext(contextTexts = []) {
    if (!contextTexts || !contextTexts.length) return "";
    return (
      "\nContext:\n" +
      contextTexts
        .map((text, i) => {
          return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
        })
        .join("")
    );
  }
  streamingEnabled() {
    return "streamGetChatCompletion" in this;
  }
  // Ensure the user set a value for the token limit
  // and if undefined - assume 4096 window.
  promptWindowLimit() {
    const limit = process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT || 4096;
    if (!limit || isNaN(Number(limit)))
      throw new Error("No token context limit was set.");
    return Number(limit);
  }
  // Short circuit since we have no idea if the model is valid or not
  // in pre-flight for generic endpoints
  isValidChatCompletionModel(_modelName = "") {
    return true;
  }
  constructPrompt({
    systemPrompt = "",
    contextTexts = [],
    chatHistory = [],
    userPrompt = "",
  }) {
    const prompt = {
      role: "system",
      content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
    };
    return [prompt, ...chatHistory, { role: "user", content: userPrompt }];
  }
  async isSafe(_input = "") {
    // Not implemented so must be stubbed
    return { safe: true, reasons: [] };
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    const result = await this.openai.chat.completions
      .create({
        model: this.model,
        messages,
        temperature,
      })
      .catch((e) => {
        throw new Error(e.response.data.error.message);
      });
    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
      return null;
    return result.choices[0].message.content;
  }
  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
    const streamRequest = await this.openai.chat.completions.create({
      model: this.model,
      stream: true,
      messages,
      temperature,
    });
    return streamRequest;
  }
  handleStream(response, stream, responseProps) {
    const { uuid = uuidv4(), sources = [] } = responseProps;
    // Custom handler for KoboldCPP stream responses
    return new Promise(async (resolve) => {
      let fullText = "";
      const handleAbort = () => clientAbortedHandler(resolve, fullText);
      response.on("close", handleAbort);
      for await (const chunk of stream) {
        const message = chunk?.choices?.[0];
        const token = message?.delta?.content;
        if (token) {
          fullText += token;
          writeResponseChunk(response, {
            uuid,
            sources: [],
            type: "textResponseChunk",
            textResponse: token,
            close: false,
            error: false,
          });
        }
        // KoboldCPP finishes with "length" or "stop"
        if (
          message.finish_reason !== "null" &&
          (message.finish_reason === "length" ||
            message.finish_reason === "stop")
        ) {
          writeResponseChunk(response, {
            uuid,
            sources,
            type: "textResponseChunk",
            textResponse: "",
            close: true,
            error: false,
          });
          response.removeListener("close", handleAbort);
          resolve(fullText);
        }
      }
    });
  }
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  async embedTextInput(textInput) {
    return await this.embedder.embedTextInput(textInput);
  }
  async embedChunks(textChunks = []) {
    return await this.embedder.embedChunks(textChunks);
  }
  async compressMessages(promptArgs = {}, rawHistory = []) {
    const { messageArrayCompressor } = require("../../helpers/chat");
    const messageArray = this.constructPrompt(promptArgs);
    return await messageArrayCompressor(this, messageArray, rawHistory);
  }
 }
 module.exports = {
  KoboldCPPLLM,
 };
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -14,6 +14,7 @@ const SUPPORT_CUSTOM_MODELS = [
  "perplexity",
  "openrouter",
  "lmstudio",
  "koboldcpp",
 ];
 async function getCustomModels(provider = "", apiKey = null, basePath = null) {
@ -39,6 +40,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
      return await getOpenRouterModels();
    case "lmstudio":
      return await getLMStudioModels(basePath);
    case "koboldcpp":
      return await getKoboldCPPModels(basePath);
    default:
      return { models: [], error: "Invalid provider for custom models" };
  }
@ -171,6 +174,28 @@ async function getLMStudioModels(basePath = null) {
  }
 }
 async function getKoboldCPPModels(basePath = null) {
  try {
    const { OpenAI: OpenAIApi } = require("openai");
    const openai = new OpenAIApi({
      baseURL: basePath || process.env.LMSTUDIO_BASE_PATH,
      apiKey: null,
    });
    const models = await openai.models
      .list()
      .then((results) => results.data)
      .catch((e) => {
        console.error(`KoboldCPP:listModels`, e.message);
        return [];
      });
    return { models, error: null };
  } catch (e) {
    console.error(`KoboldCPP:getKoboldCPPModels`, e.message);
    return { models: [], error: "Could not fetch KoboldCPP Models" };
  }
 }
 async function ollamaAIModels(basePath = null) {
  let url;
  try {
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@ -77,6 +77,9 @@ function getLLMProvider({ provider = null, model = null } = {}) {
    case "groq":
      const { GroqLLM } = require("../AiProviders/groq");
      return new GroqLLM(embedder, model);
    case "koboldcpp":
      const { KoboldCPPLLM } = require("../AiProviders/koboldCPP");
      return new KoboldCPPLLM(embedder, model);
    case "cohere":
      const { CohereLLM } = require("../AiProviders/cohere");
      return new CohereLLM(embedder, model);
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -132,6 +132,20 @@ const KEY_MAPPING = {
    checks: [nonZero],
  },
  // KoboldCPP Settings
  KoboldCPPBasePath: {
    envKey: "KOBOLD_CPP_BASE_PATH",
    checks: [isNotEmpty, isValidURL],
  },
  KoboldCPPModelPref: {
    envKey: "KOBOLD_CPP_MODEL_PREF",
    checks: [isNotEmpty],
  },
  KoboldCPPTokenLimit: {
    envKey: "KOBOLD_CPP_MODEL_TOKEN_LIMIT",
    checks: [nonZero],
  },
  // Generic OpenAI InferenceSettings
  GenericOpenAiBasePath: {
    envKey: "GENERIC_OPEN_AI_BASE_PATH",
@ -403,6 +417,7 @@ function supportedLLM(input = "") {
    "perplexity",
    "openrouter",
    "groq",
    "koboldcpp",
    "cohere",
    "generic-openai",
  ].includes(input);