merge with master

2024-11-19 20:50:09 +01:00 · 2024-05-03 10:02:53 -07:00 · 2024-05-03 10:02:53 -07:00 · c60077a078
commit c60077a078
parent 6150ff41ea 0eb16f2c60
58 changed files with 1037 additions and 1172 deletions
--- a/README.md
+++ b/README.md
@ -5,7 +5,7 @@
 </p>
 <p align="center">
-    <b>AnythingLLM: The all-in-one AI app you were looking for.<br />
+    <b>AnythingLLM:</b> The all-in-one AI app you were looking for.<br />
    Chat with your docs, use AI Agents, hyper-configurable, multi-user, & no fustrating set up required.
 </p>
--- a/collector/utils/WhisperProviders/OpenAiWhisper.js
+++ b/collector/utils/WhisperProviders/OpenAiWhisper.js
@ -22,7 +22,6 @@ class OpenAiWhisper {
      .create({
        file: fs.createReadStream(fullFilePath),
        model: this.model,
        model: "whisper-1",
        response_format: "text",
        temperature: this.temperature,
      })
--- a/collector/utils/extensions/Confluence/index.js
+++ b/collector/utils/extensions/Confluence/index.js
@ -66,11 +66,8 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
  const outFolder = slugify(
    `${subdomain}-confluence-${v4().slice(0, 4)}`
  ).toLowerCase();
-  const outFolderPath = path.resolve(
+  const outFolderPath = path.resolve(documentsFolder, outFolder);
-    __dirname,
+  if (!fs.existsSync(outFolderPath)) fs.mkdirSync(outFolderPath, { recursive: true });
    `../../../../server/storage/documents/${outFolder}`
  );
  fs.mkdirSync(outFolderPath);
  docs.forEach((doc) => {
    const data = {
--- a/collector/utils/extensions/GithubRepo/index.js
+++ b/collector/utils/extensions/GithubRepo/index.js
@ -32,7 +32,7 @@ async function loadGithubRepo(args) {
    `${repo.author}-${repo.project}-${repo.branch}-${v4().slice(0, 4)}`
  ).toLowerCase();
  const outFolderPath = path.resolve(documentsFolder, outFolder);
-  fs.mkdirSync(outFolderPath);
+  if (!fs.existsSync(outFolderPath)) fs.mkdirSync(outFolderPath, { recursive: true });
  for (const doc of docs) {
    if (!doc.pageContent) continue;
--- a/collector/utils/extensions/YoutubeTranscript/index.js
+++ b/collector/utils/extensions/YoutubeTranscript/index.js
@ -9,9 +9,8 @@ const { YoutubeLoader } = require("./YoutubeLoader");
 function validYoutubeVideoUrl(link) {
  const UrlPattern = require("url-pattern");
  const opts = new URL(link);
-  const url = `${opts.protocol}//${opts.host}${opts.pathname}${
+  const url = `${opts.protocol}//${opts.host}${opts.pathname}${opts.searchParams.has("v") ? `?v=${opts.searchParams.get("v")}` : ""
-    opts.searchParams.has("v") ? `?v=${opts.searchParams.get("v")}` : ""
+    }`;
  }`;
  const shortPatternMatch = new UrlPattern(
    "https\\://(www.)youtu.be/(:videoId)"
@ -68,7 +67,7 @@ async function loadYouTubeTranscript({ url }) {
    `${metadata.author} YouTube transcripts`
  ).toLowerCase();
  const outFolderPath = path.resolve(documentsFolder, outFolder);
-  if (!fs.existsSync(outFolderPath)) fs.mkdirSync(outFolderPath);
+  if (!fs.existsSync(outFolderPath)) fs.mkdirSync(outFolderPath, { recursive: true });
  const data = {
    id: v4(),
--- a/docker/.env.example
+++ b/docker/.env.example
@ -66,12 +66,21 @@ GID='1000'
 # GROQ_API_KEY=gsk_abcxyz
 # GROQ_MODEL_PREF=llama3-8b-8192
 # LLM_PROVIDER='koboldcpp'
 # KOBOLD_CPP_BASE_PATH='http://127.0.0.1:5000/v1'
 # KOBOLD_CPP_MODEL_PREF='koboldcpp/codellama-7b-instruct.Q4_K_S'
 # KOBOLD_CPP_MODEL_TOKEN_LIMIT=4096
 # LLM_PROVIDER='generic-openai'
 # GENERIC_OPEN_AI_BASE_PATH='http://proxy.url.openai.com/v1'
 # GENERIC_OPEN_AI_MODEL_PREF='gpt-3.5-turbo'
 # GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=4096
 # GENERIC_OPEN_AI_API_KEY=sk-123abc
 # LLM_PROVIDER='cohere'
 # COHERE_API_KEY=
 # COHERE_MODEL_PREF='command-r'
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
@ -100,6 +109,10 @@ GID='1000'
 # EMBEDDING_MODEL_PREF='nomic-ai/nomic-embed-text-v1.5-GGUF/nomic-embed-text-v1.5.Q4_0.gguf'
 # EMBEDDING_MODEL_MAX_CHUNK_LENGTH=8192
 # EMBEDDING_ENGINE='cohere'
 # COHERE_API_KEY=
 # EMBEDDING_MODEL_PREF='embed-english-v3.0'
 ###########################################
 ######## Vector Database Selection ########
 ###########################################
--- a/embed/src/utils/chat/markdown.js
+++ b/embed/src/utils/chat/markdown.js
@ -4,7 +4,7 @@ import { staticHljs as hljs } from "./hljs";
 import { v4 } from "uuid";
 const markdown = markdownIt({
-  html: true,
+  html: false,
  typographer: true,
  highlight: function (code, lang) {
    const uuid = v4();
--- a/frontend/src/components/EmbeddingSelection/CohereOptions/index.jsx
+++ b/frontend/src/components/EmbeddingSelection/CohereOptions/index.jsx
@ -0,0 +1,55 @@
 export default function CohereEmbeddingOptions({ settings }) {
  return (
    <div className="w-full flex flex-col gap-y-4">
      <div className="w-full flex items-center gap-4">
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-4">
            API Key
          </label>
          <input
            type="password"
            name="CohereApiKey"
            className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
            placeholder="Cohere API Key"
            defaultValue={settings?.CohereApiKey ? "*".repeat(20) : ""}
            required={true}
            autoComplete="off"
            spellCheck={false}
          />
        </div>
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-4">
            Model Preference
          </label>
          <select
            name="EmbeddingModelPref"
            required={true}
            className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
          >
            <optgroup label="Available embedding models">
              {[
                "embed-english-v3.0",
                "embed-multilingual-v3.0",
                "embed-english-light-v3.0",
                "embed-multilingual-light-v3.0",
                "embed-english-v2.0",
                "embed-english-light-v2.0",
                "embed-multilingual-v2.0",
              ].map((model) => {
                return (
                  <option
                    key={model}
                    value={model}
                    selected={settings?.EmbeddingModelPref === model}
                  >
                    {model}
                  </option>
                );
              })}
            </optgroup>
          </select>
        </div>
      </div>
    </div>
  );
 }
--- a/frontend/src/components/LLMSelection/CohereAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/CohereAiOptions/index.jsx
@ -0,0 +1,49 @@
 export default function CohereAiOptions({ settings }) {
  return (
    <div className="w-full flex flex-col">
      <div className="w-full flex items-center gap-4">
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-4">
            Cohere API Key
          </label>
          <input
            type="password"
            name="CohereApiKey"
            className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
            placeholder="Cohere API Key"
            defaultValue={settings?.CohereApiKey ? "*".repeat(20) : ""}
            required={true}
            autoComplete="off"
            spellCheck={false}
          />
        </div>
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-4">
            Chat Model Selection
          </label>
          <select
            name="CohereModelPref"
            defaultValue={settings?.CohereModelPref || "command-r"}
            required={true}
            className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
          >
            {[
              "command-r",
              "command-r-plus",
              "command",
              "command-light",
              "command-nightly",
              "command-light-nightly",
            ].map((model) => {
              return (
                <option key={model} value={model}>
                  {model}
                </option>
              );
            })}
          </select>
        </div>
      </div>
    </div>
  );
 }
--- a/frontend/src/components/LLMSelection/KoboldCPPOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/KoboldCPPOptions/index.jsx
@ -0,0 +1,112 @@
 import { useState, useEffect } from "react";
 import System from "@/models/system";
 export default function KoboldCPPOptions({ settings }) {
  const [basePathValue, setBasePathValue] = useState(
    settings?.KoboldCPPBasePath
  );
  const [basePath, setBasePath] = useState(settings?.KoboldCPPBasePath);
  return (
    <div className="flex gap-4 flex-wrap">
      <div className="flex flex-col w-60">
        <label className="text-white text-sm font-semibold block mb-4">
          Base URL
        </label>
        <input
          type="url"
          name="KoboldCPPBasePath"
          className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
          placeholder="http://127.0.0.1:5000/v1"
          defaultValue={settings?.KoboldCPPBasePath}
          required={true}
          autoComplete="off"
          spellCheck={false}
          onChange={(e) => setBasePathValue(e.target.value)}
          onBlur={() => setBasePath(basePathValue)}
        />
      </div>
      <KoboldCPPModelSelection settings={settings} basePath={basePath} />
      <div className="flex flex-col w-60">
        <label className="text-white text-sm font-semibold block mb-4">
          Token context window
        </label>
        <input
          type="number"
          name="KoboldCPPTokenLimit"
          className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
          placeholder="4096"
          min={1}
          onScroll={(e) => e.target.blur()}
          defaultValue={settings?.KoboldCPPTokenLimit}
          required={true}
          autoComplete="off"
        />
      </div>
    </div>
  );
 }
 function KoboldCPPModelSelection({ settings, basePath = null }) {
  const [customModels, setCustomModels] = useState([]);
  const [loading, setLoading] = useState(true);
  useEffect(() => {
    async function findCustomModels() {
      if (!basePath || !basePath.includes("/v1")) {
        setCustomModels([]);
        setLoading(false);
        return;
      }
      setLoading(true);
      const { models } = await System.customModels("koboldcpp", null, basePath);
      setCustomModels(models || []);
      setLoading(false);
    }
    findCustomModels();
  }, [basePath]);
  if (loading || customModels.length === 0) {
    return (
      <div className="flex flex-col w-60">
        <label className="text-white text-sm font-semibold block mb-4">
          Chat Model Selection
        </label>
        <select
          name="KoboldCPPModelPref"
          disabled={true}
          className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
        >
          <option disabled={true} selected={true}>
            {basePath?.includes("/v1")
              ? "-- loading available models --"
              : "-- waiting for URL --"}
          </option>
        </select>
      </div>
    );
  }
  return (
    <div className="flex flex-col w-60">
      <label className="text-white text-sm font-semibold block mb-4">
        Chat Model Selection
      </label>
      <select
        name="KoboldCPPModelPref"
        required={true}
        className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
      >
        {customModels.map((model) => (
          <option
            key={model.id}
            value={model.id}
            selected={settings?.KoboldCPPModelPref === model.id}
          >
            {model.id}
          </option>
        ))}
      </select>
    </div>
  );
 }
--- a/frontend/src/components/Sidebar/ActiveWorkspaces/index.jsx
+++ b/frontend/src/components/Sidebar/ActiveWorkspaces/index.jsx
@ -150,9 +150,13 @@ export default function ActiveWorkspaces() {
                      <Link
                        type="button"
-                        to={paths.workspace.settings.generalAppearance(
+                        to={
-                          workspace.slug
+                          isInWorkspaceSettings
-                        )}
+                            ? paths.workspace.chat(workspace.slug)
                            : paths.workspace.settings.generalAppearance(
                                workspace.slug
                              )
                        }
                        onMouseEnter={() => handleGearMouseEnter(workspace.id)}
                        onMouseLeave={() => handleGearMouseLeave(workspace.id)}
                        className="rounded-md flex items-center justify-center text-[#A7A8A9] hover:text-white ml-auto"
--- a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Chartable/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Chartable/index.jsx
@ -107,7 +107,7 @@ export function Chartable({ props, workspace }) {
        );
      case "line":
        return (
-          <div className="bg-zinc-900 p-8 pb-12 rounded-xl text-white h-[500px]">
+          <div className="bg-zinc-900 p-8 pb-12 rounded-xl text-white h-[500px] w-full">
            <h3 className="text-lg font-medium">{title}</h3>
            <LineChart
              className="h-[400px]"
@ -371,7 +371,7 @@ export function Chartable({ props, workspace }) {
        <div className="py-2 px-4 w-full flex gap-x-5 md:max-w-[800px] flex-col">
          <div className="flex gap-x-5">
            <WorkspaceProfileImage workspace={workspace} />
-            <div className="relative">
+            <div className="relative w-full">
              <DownloadGraph onClick={handleDownload} />
              <div ref={ref}>{renderChart()}</div>
              <span
@ -390,7 +390,7 @@ export function Chartable({ props, workspace }) {
  return (
    <div className="flex justify-center items-end w-full">
      <div className="py-2 px-4 w-full flex gap-x-5 md:max-w-[800px] flex-col">
-        <div className="relative">
+        <div className="relative w-full">
          <DownloadGraph onClick={handleDownload} />
          <div ref={ref}>{renderChart()}</div>
        </div>
--- a/frontend/src/hooks/useGetProvidersModels.js
+++ b/frontend/src/hooks/useGetProvidersModels.js
@ -26,6 +26,14 @@ const PROVIDER_DEFAULT_MODELS = {
    "gemma-7b-it",
  ],
  native: [],
  cohere: [
    "command-r",
    "command-r-plus",
    "command",
    "command-light",
    "command-nightly",
    "command-light-nightly",
  ],
 };
 // For togetherAi, which has a large model list - we subgroup the options
--- a/frontend/src/media/llmprovider/cohere.png
+++ b/frontend/src/media/llmprovider/cohere.png
--- a/frontend/src/media/llmprovider/koboldcpp.png
+++ b/frontend/src/media/llmprovider/koboldcpp.png
--- a/frontend/src/pages/Admin/Logging/index.jsx
+++ b/frontend/src/pages/Admin/Logging/index.jsx
@ -9,6 +9,22 @@ import showToast from "@/utils/toast";
 import CTAButton from "@/components/lib/CTAButton";
 export default function AdminLogs() {
  const query = useQuery();
  const [loading, setLoading] = useState(true);
  const [logs, setLogs] = useState([]);
  const [offset, setOffset] = useState(Number(query.get("offset") || 0));
  const [canNext, setCanNext] = useState(false);
  useEffect(() => {
    async function fetchLogs() {
      const { logs: _logs, hasPages = false } = await System.eventLogs(offset);
      setLogs(_logs);
      setCanNext(hasPages);
      setLoading(false);
    }
    fetchLogs();
  }, [offset]);
  const handleResetLogs = async () => {
    if (
      !window.confirm(
@ -19,13 +35,22 @@ export default function AdminLogs() {
    const { success, error } = await System.clearEventLogs();
    if (success) {
      showToast("Event logs cleared successfully.", "success");
-      setTimeout(() => {
+      setLogs([]);
-        window.location.reload();
+      setCanNext(false);
-      }, 1000);
+      setOffset(0);
    } else {
      showToast(`Failed to clear logs: ${error}`, "error");
    }
  };
  const handlePrevious = () => {
    setOffset(Math.max(offset - 1, 0));
  };
  const handleNext = () => {
    setOffset(offset + 1);
  };
  return (
    <div className="w-screen h-screen overflow-hidden bg-sidebar flex">
      <Sidebar />
@ -53,37 +78,28 @@ export default function AdminLogs() {
              Clear Event Logs
            </CTAButton>
          </div>
-          <LogsContainer />
+          <LogsContainer
            loading={loading}
            logs={logs}
            offset={offset}
            canNext={canNext}
            handleNext={handleNext}
            handlePrevious={handlePrevious}
          />
        </div>
      </div>
    </div>
  );
 }
-function LogsContainer() {
+function LogsContainer({
-  const query = useQuery();
+  loading,
-  const [loading, setLoading] = useState(true);
+  logs,
-  const [logs, setLogs] = useState([]);
+  offset,
-  const [offset, setOffset] = useState(Number(query.get("offset") || 0));
+  canNext,
-  const [canNext, setCanNext] = useState(false);
+  handleNext,
-
+  handlePrevious,
-  const handlePrevious = () => {
+}) {
    setOffset(Math.max(offset - 1, 0));
  };
  const handleNext = () => {
    setOffset(offset + 1);
  };
  useEffect(() => {
    async function fetchLogs() {
      const { logs: _logs, hasPages = false } = await System.eventLogs(offset);
      setLogs(_logs);
      setCanNext(hasPages);
      setLoading(false);
    }
    fetchLogs();
  }, [offset]);
  if (loading) {
    return (
      <Skeleton.default
--- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
@ -9,6 +9,7 @@ import AzureOpenAiLogo from "@/media/llmprovider/azure.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
 import OllamaLogo from "@/media/llmprovider/ollama.png";
 import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import CohereLogo from "@/media/llmprovider/cohere.png";
 import PreLoader from "@/components/Preloader";
 import ChangeWarningModal from "@/components/ChangeWarning";
 import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions";
@ -17,6 +18,8 @@ import LocalAiOptions from "@/components/EmbeddingSelection/LocalAiOptions";
 import NativeEmbeddingOptions from "@/components/EmbeddingSelection/NativeEmbeddingOptions";
 import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions";
 import LMStudioEmbeddingOptions from "@/components/EmbeddingSelection/LMStudioOptions";
 import CohereEmbeddingOptions from "@/components/EmbeddingSelection/CohereOptions";
 import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem";
 import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
 import { useModal } from "@/hooks/useModal";
@ -68,6 +71,13 @@ const EMBEDDERS = [
    description:
      "Discover, download, and run thousands of cutting edge LLMs in a few clicks.",
  },
  {
    name: "Cohere",
    value: "cohere",
    logo: CohereLogo,
    options: (settings) => <CohereEmbeddingOptions settings={settings} />,
    description: "Run powerful embedding models from Cohere.",
  },
 ];
 export default function GeneralEmbeddingPreference() {
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@ -17,6 +17,8 @@ import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import PerplexityLogo from "@/media/llmprovider/perplexity.png";
 import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
 import GroqLogo from "@/media/llmprovider/groq.png";
 import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png";
 import CohereLogo from "@/media/llmprovider/cohere.png";
 import PreLoader from "@/components/Preloader";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
 import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
@ -32,10 +34,12 @@ import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions";
 import PerplexityOptions from "@/components/LLMSelection/PerplexityOptions";
 import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions";
 import GroqAiOptions from "@/components/LLMSelection/GroqAiOptions";
 import CohereAiOptions from "@/components/LLMSelection/CohereAiOptions";
 import LLMItem from "@/components/LLMSelection/LLMItem";
 import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
 import CTAButton from "@/components/lib/CTAButton";
 import KoboldCPPOptions from "@/components/LLMSelection/KoboldCPPOptions";
 export const AVAILABLE_LLM_PROVIDERS = [
  {
@ -150,6 +154,26 @@ export const AVAILABLE_LLM_PROVIDERS = [
      "The fastest LLM inferencing available for real-time AI applications.",
    requiredConfig: ["GroqApiKey"],
  },
  {
    name: "KoboldCPP",
    value: "koboldcpp",
    logo: KoboldCPPLogo,
    options: (settings) => <KoboldCPPOptions settings={settings} />,
    description: "Run local LLMs using koboldcpp.",
    requiredConfig: [
      "KoboldCPPModelPref",
      "KoboldCPPBasePath",
      "KoboldCPPTokenLimit",
    ],
  },
  {
    name: "Cohere",
    value: "cohere",
    logo: CohereLogo,
    options: (settings) => <CohereAiOptions settings={settings} />,
    description: "Run Cohere's powerful Command models.",
    requiredConfig: ["CohereApiKey"],
  },
  {
    name: "Generic OpenAI",
    value: "generic-openai",
--- a/frontend/src/pages/OnboardingFlow/Steps/CustomLogo/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/CustomLogo/index.jsx
@ -1,140 +0,0 @@
 import useLogo from "@/hooks/useLogo";
 import System from "@/models/system";
 import showToast from "@/utils/toast";
 import { Plus } from "@phosphor-icons/react";
 import React, { useState, useEffect } from "react";
 import AnythingLLM from "@/media/logo/anything-llm.png";
 import paths from "@/utils/paths";
 import { useNavigate } from "react-router-dom";
 const TITLE = "Custom Logo";
 const DESCRIPTION =
  "Upload your custom logo to make your chatbot yours. Optional.";
 export default function CustomLogo({ setHeader, setForwardBtn, setBackBtn }) {
  const navigate = useNavigate();
  function handleForward() {
    navigate(paths.onboarding.userSetup());
  }
  function handleBack() {
    navigate(paths.onboarding.llmPreference());
  }
  useEffect(() => {
    setHeader({ title: TITLE, description: DESCRIPTION });
    setForwardBtn({ showing: true, disabled: false, onClick: handleForward });
    setBackBtn({ showing: true, disabled: false, onClick: handleBack });
  }, []);
  const { logo: _initLogo, setLogo: _setLogo } = useLogo();
  const [logo, setLogo] = useState("");
  const [isDefaultLogo, setIsDefaultLogo] = useState(true);
  useEffect(() => {
    async function logoInit() {
      setLogo(_initLogo || "");
      const _isDefaultLogo = await System.isDefaultLogo();
      setIsDefaultLogo(_isDefaultLogo);
    }
    logoInit();
  }, [_initLogo]);
  const handleFileUpload = async (event) => {
    const file = event.target.files[0];
    if (!file) return false;
    const objectURL = URL.createObjectURL(file);
    setLogo(objectURL);
    const formData = new FormData();
    formData.append("logo", file);
    const { success, error } = await System.uploadLogo(formData);
    if (!success) {
      showToast(`Failed to upload logo: ${error}`, "error");
      setLogo(_initLogo);
      return;
    }
    const logoURL = await System.fetchLogo();
    _setLogo(logoURL);
    setIsDefaultLogo(false);
  };
  const handleRemoveLogo = async () => {
    setLogo("");
    setIsDefaultLogo(true);
    const { success, error } = await System.removeCustomLogo();
    if (!success) {
      console.error("Failed to remove logo:", error);
      showToast(`Failed to remove logo: ${error}`, "error");
      const logoURL = await System.fetchLogo();
      setLogo(logoURL);
      setIsDefaultLogo(false);
      return;
    }
    const logoURL = await System.fetchLogo();
    _setLogo(logoURL);
  };
  return (
    <div className="flex items-center w-full">
      <div className="flex gap-x-8 flex-col w-full">
        {isDefaultLogo ? (
          <label className="mt-5 hover:opacity-60 w-full flex justify-center transition-all duration-300">
            <input
              id="logo-upload"
              type="file"
              accept="image/*"
              className="hidden"
              onChange={handleFileUpload}
            />
            <div
              className="max-w-[600px] w-full h-64 max-h-[600px] py-4 bg-zinc-900/50 rounded-2xl border-2 border-dashed border-white border-opacity-60 justify-center items-center inline-flex cursor-pointer"
              htmlFor="logo-upload"
            >
              <div className="flex flex-col items-center justify-center">
                <div className="rounded-full bg-white/40">
                  <Plus className="w-6 h-6 text-black/80 m-2" />
                </div>
                <div className="text-white text-opacity-80 text-sm font-semibold py-1">
                  Add a custom logo
                </div>
                <div className="text-white text-opacity-60 text-xs font-medium py-1">
                  Recommended size: 800 x 200
                </div>
              </div>
            </div>
          </label>
        ) : (
          <div className="w-full flex justify-center">
            <img
              src={logo}
              alt="Uploaded Logo"
              className="w-48 h-48 object-contain mr-6"
              hidden={isDefaultLogo}
              onError={(e) => (e.target.src = AnythingLLM)}
            />
          </div>
        )}
        {!isDefaultLogo ? (
          <button
            onClick={handleRemoveLogo}
            className="text-white text-base font-medium hover:text-opacity-60 mt-8"
          >
            Remove logo
          </button>
        ) : (
          <button
            onClick={handleForward}
            className="text-white text-base font-medium hover:text-opacity-60 mt-8"
          >
            Skip
          </button>
        )}
      </div>
    </div>
  );
 }
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@ -15,6 +15,8 @@ import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import PerplexityLogo from "@/media/llmprovider/perplexity.png";
 import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
 import GroqLogo from "@/media/llmprovider/groq.png";
 import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png";
 import CohereLogo from "@/media/llmprovider/cohere.png";
 import ZillizLogo from "@/media/vectordbs/zilliz.png";
 import AstraDBLogo from "@/media/vectordbs/astraDB.png";
 import ChromaLogo from "@/media/vectordbs/chroma.png";
@ -137,6 +139,13 @@ export const LLM_SELECTION_PRIVACY = {
    ],
    logo: GroqLogo,
  },
  koboldcpp: {
    name: "KoboldCPP",
    description: [
      "Your model and chats are only accessible on the server running KoboldCPP",
    ],
    logo: KoboldCPPLogo,
  },
  "generic-openai": {
    name: "Generic OpenAI compatible service",
    description: [
@ -144,6 +153,13 @@ export const LLM_SELECTION_PRIVACY = {
    ],
    logo: GenericOpenAiLogo,
  },
  cohere: {
    name: "Cohere",
    description: [
      "Data is shared according to the terms of service of cohere.com and your localities privacy laws.",
    ],
    logo: CohereLogo,
  },
 };
 export const VECTOR_DB_PRIVACY = {
@ -252,6 +268,13 @@ export const EMBEDDING_ENGINE_PRIVACY = {
    ],
    logo: LMStudioLogo,
  },
  cohere: {
    name: "Cohere",
    description: [
      "Data is shared according to the terms of service of cohere.com and your localities privacy laws.",
    ],
    logo: CohereLogo,
  },
 };
 export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
--- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
@ -14,6 +14,8 @@ import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import PerplexityLogo from "@/media/llmprovider/perplexity.png";
 import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
 import GroqLogo from "@/media/llmprovider/groq.png";
 import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png";
 import CohereLogo from "@/media/llmprovider/cohere.png";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
 import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
 import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
@ -28,11 +30,14 @@ import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions";
 import PerplexityOptions from "@/components/LLMSelection/PerplexityOptions";
 import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions";
 import GroqAiOptions from "@/components/LLMSelection/GroqAiOptions";
 import CohereAiOptions from "@/components/LLMSelection/CohereAiOptions";
 import LLMItem from "@/components/LLMSelection/LLMItem";
 import System from "@/models/system";
 import paths from "@/utils/paths";
 import showToast from "@/utils/toast";
 import { useNavigate } from "react-router-dom";
 import KoboldCPPOptions from "@/components/LLMSelection/KoboldCPPOptions";
 const TITLE = "LLM Preference";
 const DESCRIPTION =
@ -97,6 +102,13 @@ const LLMS = [
    options: (settings) => <LocalAiOptions settings={settings} />,
    description: "Run LLMs locally on your own machine.",
  },
  {
    name: "KoboldCPP",
    value: "koboldcpp",
    logo: KoboldCPPLogo,
    options: (settings) => <KoboldCPPOptions settings={settings} />,
    description: "Run local LLMs using koboldcpp.",
  },
  {
    name: "Together AI",
    value: "togetherai",
@ -134,6 +146,13 @@ const LLMS = [
    description:
      "The fastest LLM inferencing available for real-time AI applications.",
  },
  {
    name: "Cohere",
    value: "cohere",
    logo: CohereLogo,
    options: (settings) => <CohereAiOptions settings={settings} />,
    description: "Run Cohere's powerful Command models.",
  },
  {
    name: "Generic OpenAI",
    value: "generic-openai",
@ -200,7 +219,7 @@ export default function LLMPreference({
      showToast(`Failed to save LLM settings: ${error}`, "error");
      return;
    }
-    navigate(paths.onboarding.customLogo());
+    navigate(paths.onboarding.userSetup());
  };
  useEffect(() => {
--- a/frontend/src/pages/OnboardingFlow/Steps/UserSetup/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/UserSetup/index.jsx
@ -29,7 +29,7 @@ export default function UserSetup({ setHeader, setForwardBtn, setBackBtn }) {
  }
  function handleBack() {
-    navigate(paths.onboarding.customLogo());
+    navigate(paths.onboarding.llmPreference());
  }
  useEffect(() => {
--- a/frontend/src/pages/OnboardingFlow/Steps/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/index.jsx
@ -3,7 +3,6 @@ import { useState } from "react";
 import { isMobile } from "react-device-detect";
 import Home from "./Home";
 import LLMPreference from "./LLMPreference";
 import CustomLogo from "./CustomLogo";
 import UserSetup from "./UserSetup";
 import DataHandling from "./DataHandling";
 import Survey from "./Survey";
@ -12,7 +11,6 @@ import CreateWorkspace from "./CreateWorkspace";
 const OnboardingSteps = {
  home: Home,
  "llm-preference": LLMPreference,
  "custom-logo": CustomLogo,
  "user-setup": UserSetup,
  "data-handling": DataHandling,
  survey: Survey,
--- a/frontend/src/pages/WorkspaceSettings/GeneralAppearance/index.jsx
+++ b/frontend/src/pages/WorkspaceSettings/GeneralAppearance/index.jsx
@ -36,7 +36,6 @@ export default function GeneralInfo({ slug }) {
    );
    if (!!updatedWorkspace) {
      showToast("Workspace updated!", "success", { clear: true });
      setTimeout(() => window.location.reload(), 1_500);
    } else {
      showToast(`Error: ${message}`, "error", { clear: true });
    }
--- a/frontend/src/utils/chat/index.js
+++ b/frontend/src/utils/chat/index.js
@ -17,6 +17,7 @@ export default function handleChat(
    error,
    close,
    chatId = null,
    action = null,
  } = chatResult;
  if (type === "abort" || type === "statusResponse") {
@ -132,6 +133,12 @@ export default function handleChat(
    setChatHistory([..._chatHistory]);
    setLoadingResponse(false);
  }
  // Action Handling via special 'action' attribute on response.
  if (action === "reset_chat") {
    // Chat was reset, keep reset message and clear everything else.
    setChatHistory([_chatHistory.pop()]);
  }
 }
 export function chatPrompt(workspace) {
--- a/frontend/src/utils/chat/markdown.js
+++ b/frontend/src/utils/chat/markdown.js
@ -5,7 +5,7 @@ import "highlight.js/styles/github-dark-dimmed.min.css";
 import { v4 } from "uuid";
 const markdown = markdownIt({
-  html: true,
+  html: false,
  typographer: true,
  highlight: function (code, lang) {
    const uuid = v4();
--- a/frontend/src/utils/paths.js
+++ b/frontend/src/utils/paths.js
@ -23,9 +23,6 @@ export default {
    vectorDatabase: () => {
      return "/onboarding/vector-database";
    },
    customLogo: () => {
      return "/onboarding/custom-logo";
    },
    userSetup: () => {
      return "/onboarding/user-setup";
    },
--- a/server/.env.example
+++ b/server/.env.example
@ -63,12 +63,21 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
 # GROQ_API_KEY=gsk_abcxyz
 # GROQ_MODEL_PREF=llama3-8b-8192
 # LLM_PROVIDER='koboldcpp'
 # KOBOLD_CPP_BASE_PATH='http://127.0.0.1:5000/v1'
 # KOBOLD_CPP_MODEL_PREF='koboldcpp/codellama-7b-instruct.Q4_K_S'
 # KOBOLD_CPP_MODEL_TOKEN_LIMIT=4096
 # LLM_PROVIDER='generic-openai'
 # GENERIC_OPEN_AI_BASE_PATH='http://proxy.url.openai.com/v1'
 # GENERIC_OPEN_AI_MODEL_PREF='gpt-3.5-turbo'
 # GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=4096
 # GENERIC_OPEN_AI_API_KEY=sk-123abc
 # LLM_PROVIDER='cohere'
 # COHERE_API_KEY=
 # COHERE_MODEL_PREF='command-r'
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
@ -97,6 +106,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
 # EMBEDDING_MODEL_PREF='nomic-ai/nomic-embed-text-v1.5-GGUF/nomic-embed-text-v1.5.Q4_0.gguf'
 # EMBEDDING_MODEL_MAX_CHUNK_LENGTH=8192
 # EMBEDDING_ENGINE='cohere'
 # COHERE_API_KEY=
 # EMBEDDING_MODEL_PREF='embed-english-v3.0'
 ###########################################
 ######## Vector Database Selection ########
 ###########################################
--- a/server/endpoints/system.js
+++ b/server/endpoints/system.js
@ -918,7 +918,7 @@ function systemEndpoints(app) {
    [validatedRequest, flexUserRoleValid([ROLES.admin])],
    async (request, response) => {
      try {
-        const { offset = 0, limit = 20 } = reqBody(request);
+        const { offset = 0, limit = 10 } = reqBody(request);
        const logs = await EventLogs.whereWithData({}, limit, offset * limit, {
          id: "desc",
        });
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -363,11 +363,20 @@ const SystemSettings = {
      HuggingFaceLLMAccessToken: !!process.env.HUGGING_FACE_LLM_API_KEY,
      HuggingFaceLLMTokenLimit: process.env.HUGGING_FACE_LLM_TOKEN_LIMIT,
      // KoboldCPP Keys
      KoboldCPPModelPref: process.env.KOBOLD_CPP_MODEL_PREF,
      KoboldCPPBasePath: process.env.KOBOLD_CPP_BASE_PATH,
      KoboldCPPTokenLimit: process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT,
      // Generic OpenAI Keys
      GenericOpenAiBasePath: process.env.GENERIC_OPEN_AI_BASE_PATH,
      GenericOpenAiModelPref: process.env.GENERIC_OPEN_AI_MODEL_PREF,
      GenericOpenAiTokenLimit: process.env.GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT,
      GenericOpenAiKey: !!process.env.GENERIC_OPEN_AI_API_KEY,
      // Cohere API Keys
      CohereApiKey: !!process.env.COHERE_API_KEY,
      CohereModelPref: process.env.COHERE_MODEL_PREF,
    };
  },
 };
--- a/server/package.json
+++ b/server/package.json
@ -41,6 +41,7 @@
    "chalk": "^4",
    "check-disk-space": "^3.4.0",
    "chromadb": "^1.5.2",
    "cohere-ai": "^7.9.5",
    "cors": "^2.8.5",
    "dotenv": "^16.0.3",
    "express": "^4.18.2",
--- a/server/utils/AiProviders/anthropic/index.js
+++ b/server/utils/AiProviders/anthropic/index.js
@ -1,5 +1,4 @@
 const { v4 } = require("uuid");
 const { chatPrompt } = require("../../chats");
 const {
  writeResponseChunk,
  clientAbortedHandler,
@ -33,7 +32,7 @@ class AnthropicLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  promptWindowLimit() {
@ -110,31 +109,6 @@ class AnthropicLLM {
    }
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!this.isValidChatCompletionModel(this.model))
      throw new Error(
        `Anthropic chat: ${this.model} is not valid for chat completion!`
      );
    const messages = await this.compressMessages(
      {
        systemPrompt: chatPrompt(workspace),
        userPrompt: prompt,
        chatHistory,
      },
      rawHistory
    );
    const streamRequest = await this.anthropic.messages.stream({
      model: this.model,
      max_tokens: 4096,
      system: messages[0].content, // Strip out the system message
      messages: messages.slice(1), // Pop off the system message
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
    });
    return streamRequest;
  }
  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!this.isValidChatCompletionModel(this.model))
      throw new Error(
--- a/server/utils/AiProviders/azureOpenAi/index.js
+++ b/server/utils/AiProviders/azureOpenAi/index.js
@ -1,5 +1,4 @@
 const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
 const { chatPrompt } = require("../../chats");
 const {
  writeResponseChunk,
  clientAbortedHandler,
@ -45,7 +44,7 @@ class AzureOpenAiLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  // Sure the user selected a proper value for the token limit
@ -82,66 +81,6 @@ class AzureOpenAiLLM {
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!this.model)
      throw new Error(
        "No OPEN_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an LLM chat model like GPT-3.5."
      );
    const messages = await this.compressMessages(
      {
        systemPrompt: chatPrompt(workspace),
        userPrompt: prompt,
        chatHistory,
      },
      rawHistory
    );
    const textResponse = await this.openai
      .getChatCompletions(this.model, messages, {
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
      })
      .then((res) => {
        if (!res.hasOwnProperty("choices"))
          throw new Error("AzureOpenAI chat: No results!");
        if (res.choices.length === 0)
          throw new Error("AzureOpenAI chat: No results length!");
        return res.choices[0].message.content;
      })
      .catch((error) => {
        console.log(error);
        throw new Error(
          `AzureOpenAI::getChatCompletions failed with: ${error.message}`
        );
      });
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!this.model)
      throw new Error(
        "No OPEN_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an LLM chat model like GPT-3.5."
      );
    const messages = await this.compressMessages(
      {
        systemPrompt: chatPrompt(workspace),
        userPrompt: prompt,
        chatHistory,
      },
      rawHistory
    );
    const stream = await this.openai.streamChatCompletions(
      this.model,
      messages,
      {
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
      }
    );
    return stream;
  }
  async getChatCompletion(messages = [], { temperature = 0.7 }) {
    if (!this.model)
      throw new Error(
--- a/server/utils/AiProviders/cohere/index.js
+++ b/server/utils/AiProviders/cohere/index.js
@ -0,0 +1,226 @@
 const { v4 } = require("uuid");
 const { writeResponseChunk } = require("../../helpers/chat/responses");
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 class CohereLLM {
  constructor(embedder = null) {
    const { CohereClient } = require("cohere-ai");
    if (!process.env.COHERE_API_KEY)
      throw new Error("No Cohere API key was set.");
    const cohere = new CohereClient({
      token: process.env.COHERE_API_KEY,
    });
    this.cohere = cohere;
    this.model = process.env.COHERE_MODEL_PREF;
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
      system: this.promptWindowLimit() * 0.15,
      user: this.promptWindowLimit() * 0.7,
    };
    this.embedder = !!embedder ? embedder : new NativeEmbedder();
  }
  #appendContext(contextTexts = []) {
    if (!contextTexts || !contextTexts.length) return "";
    return (
      "\nContext:\n" +
      contextTexts
        .map((text, i) => {
          return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
        })
        .join("")
    );
  }
  #convertChatHistoryCohere(chatHistory = []) {
    let cohereHistory = [];
    chatHistory.forEach((message) => {
      switch (message.role) {
        case "system":
          cohereHistory.push({ role: "SYSTEM", message: message.content });
          break;
        case "user":
          cohereHistory.push({ role: "USER", message: message.content });
          break;
        case "assistant":
          cohereHistory.push({ role: "CHATBOT", message: message.content });
          break;
      }
    });
    return cohereHistory;
  }
  streamingEnabled() {
    return "streamGetChatCompletion" in this;
  }
  promptWindowLimit() {
    switch (this.model) {
      case "command-r":
        return 128_000;
      case "command-r-plus":
        return 128_000;
      case "command":
        return 4_096;
      case "command-light":
        return 4_096;
      case "command-nightly":
        return 8_192;
      case "command-light-nightly":
        return 8_192;
      default:
        return 4_096;
    }
  }
  async isValidChatCompletionModel(model = "") {
    const validModels = [
      "command-r",
      "command-r-plus",
      "command",
      "command-light",
      "command-nightly",
      "command-light-nightly",
    ];
    return validModels.includes(model);
  }
  constructPrompt({
    systemPrompt = "",
    contextTexts = [],
    chatHistory = [],
    userPrompt = "",
  }) {
    const prompt = {
      role: "system",
      content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
    };
    return [prompt, ...chatHistory, { role: "user", content: userPrompt }];
  }
  async isSafe(_input = "") {
    // Not implemented so must be stubbed
    return { safe: true, reasons: [] };
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `Cohere chat: ${this.model} is not valid for chat completion!`
      );
    const message = messages[messages.length - 1].content; // Get the last message
    const cohereHistory = this.#convertChatHistoryCohere(messages.slice(0, -1)); // Remove the last message and convert to Cohere
    const chat = await this.cohere.chat({
      model: this.model,
      message: message,
      chatHistory: cohereHistory,
      temperature,
    });
    if (!chat.hasOwnProperty("text")) return null;
    return chat.text;
  }
  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `Cohere chat: ${this.model} is not valid for chat completion!`
      );
    const message = messages[messages.length - 1].content; // Get the last message
    const cohereHistory = this.#convertChatHistoryCohere(messages.slice(0, -1)); // Remove the last message and convert to Cohere
    const stream = await this.cohere.chatStream({
      model: this.model,
      message: message,
      chatHistory: cohereHistory,
      temperature,
    });
    return { type: "stream", stream: stream };
  }
  async handleStream(response, stream, responseProps) {
    return new Promise(async (resolve) => {
      let fullText = "";
      const { uuid = v4(), sources = [] } = responseProps;
      const handleAbort = () => {
        writeResponseChunk(response, {
          uuid,
          sources,
          type: "abort",
          textResponse: fullText,
          close: true,
          error: false,
        });
        response.removeListener("close", handleAbort);
        resolve(fullText);
      };
      response.on("close", handleAbort);
      try {
        for await (const chat of stream.stream) {
          if (chat.eventType === "text-generation") {
            const text = chat.text;
            fullText += text;
            writeResponseChunk(response, {
              uuid,
              sources,
              type: "textResponseChunk",
              textResponse: text,
              close: false,
              error: false,
            });
          }
        }
        writeResponseChunk(response, {
          uuid,
          sources,
          type: "textResponseChunk",
          textResponse: "",
          close: true,
          error: false,
        });
        response.removeListener("close", handleAbort);
        resolve(fullText);
      } catch (error) {
        writeResponseChunk(response, {
          uuid,
          sources,
          type: "abort",
          textResponse: null,
          close: true,
          error: error.message,
        });
        response.removeListener("close", handleAbort);
        resolve(fullText);
      }
    });
  }
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  async embedTextInput(textInput) {
    return await this.embedder.embedTextInput(textInput);
  }
  async embedChunks(textChunks = []) {
    return await this.embedder.embedChunks(textChunks);
  }
  async compressMessages(promptArgs = {}, rawHistory = []) {
    const { messageArrayCompressor } = require("../../helpers/chat");
    const messageArray = this.constructPrompt(promptArgs);
    return await messageArrayCompressor(this, messageArray, rawHistory);
  }
 }
 module.exports = {
  CohereLLM,
 };
--- a/server/utils/AiProviders/gemini/index.js
+++ b/server/utils/AiProviders/gemini/index.js
@ -1,4 +1,3 @@
 const { chatPrompt } = require("../../chats");
 const {
  writeResponseChunk,
  clientAbortedHandler,
@ -48,7 +47,7 @@ class GeminiLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  promptWindowLimit() {
@ -118,32 +117,6 @@ class GeminiLLM {
    return allMessages;
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!this.isValidChatCompletionModel(this.model))
      throw new Error(
        `Gemini chat: ${this.model} is not valid for chat completion!`
      );
    const compressedHistory = await this.compressMessages(
      {
        systemPrompt: chatPrompt(workspace),
        chatHistory,
      },
      rawHistory
    );
    const chatThread = this.gemini.startChat({
      history: this.formatMessages(compressedHistory),
    });
    const result = await chatThread.sendMessage(prompt);
    const response = result.response;
    const responseText = response.text();
    if (!responseText) throw new Error("Gemini: No response could be parsed.");
    return responseText;
  }
  async getChatCompletion(messages = [], _opts = {}) {
    if (!this.isValidChatCompletionModel(this.model))
      throw new Error(
@ -165,30 +138,6 @@ class GeminiLLM {
    return responseText;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!this.isValidChatCompletionModel(this.model))
      throw new Error(
        `Gemini chat: ${this.model} is not valid for chat completion!`
      );
    const compressedHistory = await this.compressMessages(
      {
        systemPrompt: chatPrompt(workspace),
        chatHistory,
      },
      rawHistory
    );
    const chatThread = this.gemini.startChat({
      history: this.formatMessages(compressedHistory),
    });
    const responseStream = await chatThread.sendMessageStream(prompt);
    if (!responseStream.stream)
      throw new Error("Could not stream response stream from Gemini.");
    return responseStream.stream;
  }
  async streamGetChatCompletion(messages = [], _opts = {}) {
    if (!this.isValidChatCompletionModel(this.model))
      throw new Error(
--- a/server/utils/AiProviders/genericOpenAi/index.js
+++ b/server/utils/AiProviders/genericOpenAi/index.js
@ -1,5 +1,4 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { chatPrompt } = require("../../chats");
 const {
  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@ -53,7 +52,7 @@ class GenericOpenAiLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  // Ensure the user set a value for the token limit
@ -89,55 +88,6 @@ class GenericOpenAiLLM {
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    const textResponse = await this.openai.chat.completions
      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      })
      .then((result) => {
        if (!result.hasOwnProperty("choices"))
          throw new Error("GenericOpenAI chat: No results!");
        if (result.choices.length === 0)
          throw new Error("GenericOpenAI chat: No results length!");
        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
          `GenericOpenAI::createChatCompletion failed with: ${error.message}`
        );
      });
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    const streamRequest = await this.openai.chat.completions.create({
      model: this.model,
      stream: true,
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
      n: 1,
      messages: await this.compressMessages(
        {
          systemPrompt: chatPrompt(workspace),
          userPrompt: prompt,
          chatHistory,
        },
        rawHistory
      ),
    });
    return streamRequest;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    const result = await this.openai.chat.completions
      .create({
--- a/server/utils/AiProviders/groq/index.js
+++ b/server/utils/AiProviders/groq/index.js
@ -1,5 +1,4 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { chatPrompt } = require("../../chats");
 const {
  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@ -38,7 +37,7 @@ class GroqLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  promptWindowLimit() {
@ -91,65 +90,6 @@ class GroqLLM {
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `Groq chat: ${this.model} is not valid for chat completion!`
      );
    const textResponse = await this.openai.chat.completions
      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      })
      .then((result) => {
        if (!result.hasOwnProperty("choices"))
          throw new Error("GroqAI chat: No results!");
        if (result.choices.length === 0)
          throw new Error("GroqAI chat: No results length!");
        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
          `GroqAI::createChatCompletion failed with: ${error.message}`
        );
      });
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `GroqAI:streamChat: ${this.model} is not valid for chat completion!`
      );
    const streamRequest = await this.openai.chat.completions.create({
      model: this.model,
      stream: true,
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
      n: 1,
      messages: await this.compressMessages(
        {
          systemPrompt: chatPrompt(workspace),
          userPrompt: prompt,
          chatHistory,
        },
        rawHistory
      ),
    });
    return streamRequest;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
--- a/server/utils/AiProviders/huggingface/index.js
+++ b/server/utils/AiProviders/huggingface/index.js
@ -1,6 +1,5 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
 const { chatPrompt } = require("../../chats");
 const {
  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@ -48,7 +47,7 @@ class HuggingFaceLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  promptWindowLimit() {
@ -90,55 +89,6 @@ class HuggingFaceLLM {
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    const textResponse = await this.openai.chat.completions
      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      })
      .then((result) => {
        if (!result.hasOwnProperty("choices"))
          throw new Error("HuggingFace chat: No results!");
        if (result.choices.length === 0)
          throw new Error("HuggingFace chat: No results length!");
        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
          `HuggingFace::createChatCompletion failed with: ${error.message}`
        );
      });
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    const streamRequest = await this.openai.chat.completions.create({
      model: this.model,
      stream: true,
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
      n: 1,
      messages: await this.compressMessages(
        {
          systemPrompt: chatPrompt(workspace),
          userPrompt: prompt,
          chatHistory,
        },
        rawHistory
      ),
    });
    return streamRequest;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    const result = await this.openai.createChatCompletion({
      model: this.model,
--- a/server/utils/AiProviders/koboldCPP/index.js
+++ b/server/utils/AiProviders/koboldCPP/index.js
@ -0,0 +1,180 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const {
  clientAbortedHandler,
  writeResponseChunk,
 } = require("../../helpers/chat/responses");
 const { v4: uuidv4 } = require("uuid");
 class KoboldCPPLLM {
  constructor(embedder = null, modelPreference = null) {
    const { OpenAI: OpenAIApi } = require("openai");
    if (!process.env.KOBOLD_CPP_BASE_PATH)
      throw new Error(
        "KoboldCPP must have a valid base path to use for the api."
      );
    this.basePath = process.env.KOBOLD_CPP_BASE_PATH;
    this.openai = new OpenAIApi({
      baseURL: this.basePath,
      apiKey: null,
    });
    this.model = modelPreference ?? process.env.KOBOLD_CPP_MODEL_PREF ?? null;
    if (!this.model) throw new Error("KoboldCPP must have a valid model set.");
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
      system: this.promptWindowLimit() * 0.15,
      user: this.promptWindowLimit() * 0.7,
    };
    if (!embedder)
      console.warn(
        "No embedding provider defined for KoboldCPPLLM - falling back to NativeEmbedder for embedding!"
      );
    this.embedder = !embedder ? new NativeEmbedder() : embedder;
    this.defaultTemp = 0.7;
    this.log(`Inference API: ${this.basePath} Model: ${this.model}`);
  }
  log(text, ...args) {
    console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
  }
  #appendContext(contextTexts = []) {
    if (!contextTexts || !contextTexts.length) return "";
    return (
      "\nContext:\n" +
      contextTexts
        .map((text, i) => {
          return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
        })
        .join("")
    );
  }
  streamingEnabled() {
    return "streamGetChatCompletion" in this;
  }
  // Ensure the user set a value for the token limit
  // and if undefined - assume 4096 window.
  promptWindowLimit() {
    const limit = process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT || 4096;
    if (!limit || isNaN(Number(limit)))
      throw new Error("No token context limit was set.");
    return Number(limit);
  }
  // Short circuit since we have no idea if the model is valid or not
  // in pre-flight for generic endpoints
  isValidChatCompletionModel(_modelName = "") {
    return true;
  }
  constructPrompt({
    systemPrompt = "",
    contextTexts = [],
    chatHistory = [],
    userPrompt = "",
  }) {
    const prompt = {
      role: "system",
      content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
    };
    return [prompt, ...chatHistory, { role: "user", content: userPrompt }];
  }
  async isSafe(_input = "") {
    // Not implemented so must be stubbed
    return { safe: true, reasons: [] };
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    const result = await this.openai.chat.completions
      .create({
        model: this.model,
        messages,
        temperature,
      })
      .catch((e) => {
        throw new Error(e.response.data.error.message);
      });
    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
      return null;
    return result.choices[0].message.content;
  }
  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
    const streamRequest = await this.openai.chat.completions.create({
      model: this.model,
      stream: true,
      messages,
      temperature,
    });
    return streamRequest;
  }
  handleStream(response, stream, responseProps) {
    const { uuid = uuidv4(), sources = [] } = responseProps;
    // Custom handler for KoboldCPP stream responses
    return new Promise(async (resolve) => {
      let fullText = "";
      const handleAbort = () => clientAbortedHandler(resolve, fullText);
      response.on("close", handleAbort);
      for await (const chunk of stream) {
        const message = chunk?.choices?.[0];
        const token = message?.delta?.content;
        if (token) {
          fullText += token;
          writeResponseChunk(response, {
            uuid,
            sources: [],
            type: "textResponseChunk",
            textResponse: token,
            close: false,
            error: false,
          });
        }
        // KoboldCPP finishes with "length" or "stop"
        if (
          message.finish_reason !== "null" &&
          (message.finish_reason === "length" ||
            message.finish_reason === "stop")
        ) {
          writeResponseChunk(response, {
            uuid,
            sources,
            type: "textResponseChunk",
            textResponse: "",
            close: true,
            error: false,
          });
          response.removeListener("close", handleAbort);
          resolve(fullText);
        }
      }
    });
  }
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  async embedTextInput(textInput) {
    return await this.embedder.embedTextInput(textInput);
  }
  async embedChunks(textChunks = []) {
    return await this.embedder.embedChunks(textChunks);
  }
  async compressMessages(promptArgs = {}, rawHistory = []) {
    const { messageArrayCompressor } = require("../../helpers/chat");
    const messageArray = this.constructPrompt(promptArgs);
    return await messageArrayCompressor(this, messageArray, rawHistory);
  }
 }
 module.exports = {
  KoboldCPPLLM,
 };
--- a/server/utils/AiProviders/lmStudio/index.js
+++ b/server/utils/AiProviders/lmStudio/index.js
@ -1,4 +1,3 @@
 const { chatPrompt } = require("../../chats");
 const {
  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@ -49,7 +48,7 @@ class LMStudioLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  // Ensure the user set a value for the token limit
@ -85,65 +84,6 @@ class LMStudioLLM {
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!this.model)
      throw new Error(
        `LMStudio chat: ${this.model} is not valid or defined for chat completion!`
      );
    const textResponse = await this.lmstudio.chat.completions
      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      })
      .then((result) => {
        if (!result.hasOwnProperty("choices"))
          throw new Error("LMStudio chat: No results!");
        if (result.choices.length === 0)
          throw new Error("LMStudio chat: No results length!");
        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
          `LMStudio::createChatCompletion failed with: ${error.message}`
        );
      });
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!this.model)
      throw new Error(
        `LMStudio chat: ${this.model} is not valid or defined for chat completion!`
      );
    const streamRequest = await this.lmstudio.chat.completions.create({
      model: this.model,
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
      n: 1,
      stream: true,
      messages: await this.compressMessages(
        {
          systemPrompt: chatPrompt(workspace),
          userPrompt: prompt,
          chatHistory,
        },
        rawHistory
      ),
    });
    return streamRequest;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!this.model)
      throw new Error(
--- a/server/utils/AiProviders/localAi/index.js
+++ b/server/utils/AiProviders/localAi/index.js
@ -1,4 +1,3 @@
 const { chatPrompt } = require("../../chats");
 const {
  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@ -41,7 +40,7 @@ class LocalAiLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  // Ensure the user set a value for the token limit
@ -75,65 +74,6 @@ class LocalAiLLM {
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `LocalAI chat: ${this.model} is not valid for chat completion!`
      );
    const textResponse = await this.openai.chat.completions
      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      })
      .then((result) => {
        if (!result.hasOwnProperty("choices"))
          throw new Error("LocalAI chat: No results!");
        if (result.choices.length === 0)
          throw new Error("LocalAI chat: No results length!");
        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
          `LocalAI::createChatCompletion failed with: ${error.message}`
        );
      });
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `LocalAI chat: ${this.model} is not valid for chat completion!`
      );
    const streamRequest = await this.openai.chat.completions.create({
      model: this.model,
      stream: true,
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
      n: 1,
      messages: await this.compressMessages(
        {
          systemPrompt: chatPrompt(workspace),
          userPrompt: prompt,
          chatHistory,
        },
        rawHistory
      ),
    });
    return streamRequest;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
--- a/server/utils/AiProviders/mistral/index.js
+++ b/server/utils/AiProviders/mistral/index.js
@ -1,4 +1,3 @@
 const { chatPrompt } = require("../../chats");
 const {
  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@ -42,7 +41,7 @@ class MistralLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  promptWindowLimit() {
@ -70,64 +69,6 @@ class MistralLLM {
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `Mistral chat: ${this.model} is not valid for chat completion!`
      );
    const textResponse = await this.openai.chat.completions
      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      })
      .then((result) => {
        if (!result.hasOwnProperty("choices"))
          throw new Error("Mistral chat: No results!");
        if (result.choices.length === 0)
          throw new Error("Mistral chat: No results length!");
        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
          `Mistral::createChatCompletion failed with: ${error.message}`
        );
      });
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `Mistral chat: ${this.model} is not valid for chat completion!`
      );
    const streamRequest = await this.openai.chat.completions.create({
      model: this.model,
      stream: true,
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
      messages: await this.compressMessages(
        {
          systemPrompt: chatPrompt(workspace),
          userPrompt: prompt,
          chatHistory,
        },
        rawHistory
      ),
    });
    return streamRequest;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
--- a/server/utils/AiProviders/native/index.js
+++ b/server/utils/AiProviders/native/index.js
@ -1,7 +1,6 @@
 const fs = require("fs");
 const path = require("path");
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { chatPrompt } = require("../../chats");
 const {
  writeResponseChunk,
  clientAbortedHandler,
@ -94,7 +93,7 @@ class NativeLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  // Ensure the user set a value for the token limit
@ -123,45 +122,6 @@ class NativeLLM {
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    try {
      const messages = await this.compressMessages(
        {
          systemPrompt: chatPrompt(workspace),
          userPrompt: prompt,
          chatHistory,
        },
        rawHistory
      );
      const model = await this.#llamaClient({
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
      });
      const response = await model.call(messages);
      return response.content;
    } catch (error) {
      throw new Error(
        `NativeLLM::createChatCompletion failed with: ${error.message}`
      );
    }
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    const model = await this.#llamaClient({
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
    });
    const messages = await this.compressMessages(
      {
        systemPrompt: chatPrompt(workspace),
        userPrompt: prompt,
        chatHistory,
      },
      rawHistory
    );
    const responseStream = await model.stream(messages);
    return responseStream;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    const model = await this.#llamaClient({ temperature });
    const response = await model.call(messages);
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@ -1,4 +1,3 @@
 const { chatPrompt } = require("../../chats");
 const { StringOutputParser } = require("@langchain/core/output_parsers");
 const {
  writeResponseChunk,
@ -74,7 +73,7 @@ class OllamaAILLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  // Ensure the user set a value for the token limit
@ -108,53 +107,6 @@ class OllamaAILLM {
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    const messages = await this.compressMessages(
      {
        systemPrompt: chatPrompt(workspace),
        userPrompt: prompt,
        chatHistory,
      },
      rawHistory
    );
    const model = this.#ollamaClient({
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
    });
    const textResponse = await model
      .pipe(new StringOutputParser())
      .invoke(this.#convertToLangchainPrototypes(messages))
      .catch((e) => {
        throw new Error(
          `Ollama::getChatCompletion failed to communicate with Ollama. ${e.message}`
        );
      });
    if (!textResponse || !textResponse.length)
      throw new Error(`Ollama::sendChat text response was empty.`);
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    const messages = await this.compressMessages(
      {
        systemPrompt: chatPrompt(workspace),
        userPrompt: prompt,
        chatHistory,
      },
      rawHistory
    );
    const model = this.#ollamaClient({
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
    });
    const stream = await model
      .pipe(new StringOutputParser())
      .stream(this.#convertToLangchainPrototypes(messages));
    return stream;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    const model = this.#ollamaClient({ temperature });
    const textResponse = await model
--- a/server/utils/AiProviders/openAi/index.js
+++ b/server/utils/AiProviders/openAi/index.js
@ -1,5 +1,4 @@
 const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
 const { chatPrompt } = require("../../chats");
 const {
  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@ -41,7 +40,7 @@ class OpenAiLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  promptWindowLimit() {
@ -122,65 +121,6 @@ class OpenAiLLM {
    return { safe: false, reasons };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `OpenAI chat: ${this.model} is not valid for chat completion!`
      );
    const textResponse = await this.openai.chat.completions
      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      })
      .then((result) => {
        if (!result.hasOwnProperty("choices"))
          throw new Error("OpenAI chat: No results!");
        if (result.choices.length === 0)
          throw new Error("OpenAI chat: No results length!");
        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
          `OpenAI::createChatCompletion failed with: ${error.message}`
        );
      });
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `OpenAI chat: ${this.model} is not valid for chat completion!`
      );
    const streamRequest = await this.openai.chat.completions({
      model: this.model,
      stream: true,
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
      n: 1,
      messages: await this.compressMessages(
        {
          systemPrompt: chatPrompt(workspace),
          userPrompt: prompt,
          chatHistory,
        },
        rawHistory
      ),
    });
    return streamRequest;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
--- a/server/utils/AiProviders/openRouter/index.js
+++ b/server/utils/AiProviders/openRouter/index.js
@ -1,10 +1,8 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { chatPrompt } = require("../../chats");
 const { v4: uuidv4 } = require("uuid");
 const {
  writeResponseChunk,
  clientAbortedHandler,
  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
 const fs = require("fs");
 const path = require("path");
@ -99,7 +97,7 @@ class OpenRouterLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  promptWindowLimit() {
@ -131,65 +129,6 @@ class OpenRouterLLM {
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `OpenRouter chat: ${this.model} is not valid for chat completion!`
      );
    const textResponse = await this.openai.chat.completions
      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      })
      .then((result) => {
        if (!result.hasOwnProperty("choices"))
          throw new Error("OpenRouter chat: No results!");
        if (result.choices.length === 0)
          throw new Error("OpenRouter chat: No results length!");
        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
          `OpenRouter::createChatCompletion failed with: ${error.message}`
        );
      });
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `OpenRouter chat: ${this.model} is not valid for chat completion!`
      );
    const streamRequest = await this.openai.chat.completions.create({
      model: this.model,
      stream: true,
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
      n: 1,
      messages: await this.compressMessages(
        {
          systemPrompt: chatPrompt(workspace),
          userPrompt: prompt,
          chatHistory,
        },
        rawHistory
      ),
    });
    return streamRequest;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
@ -304,143 +243,6 @@ class OpenRouterLLM {
    });
  }
  // handleStream(response, stream, responseProps) {
  //   const timeoutThresholdMs = 500;
  //   const { uuid = uuidv4(), sources = [] } = responseProps;
  //   return new Promise((resolve) => {
  //     let fullText = "";
  //     let chunk = "";
  //     let lastChunkTime = null; // null when first token is still not received.
  //     // Establish listener to early-abort a streaming response
  //     // in case things go sideways or the user does not like the response.
  //     // We preserve the generated text but continue as if chat was completed
  //     // to preserve previously generated content.
  //     const handleAbort = () => clientAbortedHandler(resolve, fullText);
  //     response.on("close", handleAbort);
  //     // NOTICE: Not all OpenRouter models will return a stop reason
  //     // which keeps the connection open and so the model never finalizes the stream
  //     // like the traditional OpenAI response schema does. So in the case the response stream
  //     // never reaches a formal close state we maintain an interval timer that if we go >=timeoutThresholdMs with
  //     // no new chunks then we kill the stream and assume it to be complete. OpenRouter is quite fast
  //     // so this threshold should permit most responses, but we can adjust `timeoutThresholdMs` if
  //     // we find it is too aggressive.
  //     const timeoutCheck = setInterval(() => {
  //       if (lastChunkTime === null) return;
  //       const now = Number(new Date());
  //       const diffMs = now - lastChunkTime;
  //       if (diffMs >= timeoutThresholdMs) {
  //         console.log(
  //           `OpenRouter stream did not self-close and has been stale for >${timeoutThresholdMs}ms. Closing response stream.`
  //         );
  //         writeResponseChunk(response, {
  //           uuid,
  //           sources,
  //           type: "textResponseChunk",
  //           textResponse: "",
  //           close: true,
  //           error: false,
  //         });
  //         clearInterval(timeoutCheck);
  //         response.removeListener("close", handleAbort);
  //         resolve(fullText);
  //       }
  //     }, 500);
  //     stream.data.on("data", (data) => {
  //       const lines = data
  //         ?.toString()
  //         ?.split("\n")
  //         .filter((line) => line.trim() !== "");
  //       for (const line of lines) {
  //         let validJSON = false;
  //         const message = chunk + line.replace(/^data: /, "");
  //         // JSON chunk is incomplete and has not ended yet
  //         // so we need to stitch it together. You would think JSON
  //         // chunks would only come complete - but they don't!
  //         try {
  //           JSON.parse(message);
  //           validJSON = true;
  //         } catch { }
  //         if (!validJSON) {
  //           // It can be possible that the chunk decoding is running away
  //           // and the message chunk fails to append due to string length.
  //           // In this case abort the chunk and reset so we can continue.
  //           // ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
  //           try {
  //             chunk += message;
  //           } catch (e) {
  //             console.error(`Chunk appending error`, e);
  //             chunk = "";
  //           }
  //           continue;
  //         } else {
  //           chunk = "";
  //         }
  //         if (message == "[DONE]") {
  //           lastChunkTime = Number(new Date());
  //           writeResponseChunk(response, {
  //             uuid,
  //             sources,
  //             type: "textResponseChunk",
  //             textResponse: "",
  //             close: true,
  //             error: false,
  //           });
  //           clearInterval(timeoutCheck);
  //           response.removeListener("close", handleAbort);
  //           resolve(fullText);
  //         } else {
  //           let finishReason = null;
  //           let token = "";
  //           try {
  //             const json = JSON.parse(message);
  //             token = json?.choices?.[0]?.delta?.content;
  //             finishReason = json?.choices?.[0]?.finish_reason || null;
  //           } catch {
  //             continue;
  //           }
  //           if (token) {
  //             fullText += token;
  //             lastChunkTime = Number(new Date());
  //             writeResponseChunk(response, {
  //               uuid,
  //               sources: [],
  //               type: "textResponseChunk",
  //               textResponse: token,
  //               close: false,
  //               error: false,
  //             });
  //           }
  //           if (finishReason !== null) {
  //             lastChunkTime = Number(new Date());
  //             writeResponseChunk(response, {
  //               uuid,
  //               sources,
  //               type: "textResponseChunk",
  //               textResponse: "",
  //               close: true,
  //               error: false,
  //             });
  //             clearInterval(timeoutCheck);
  //             response.removeListener("close", handleAbort);
  //             resolve(fullText);
  //           }
  //         }
  //       }
  //     });
  //   });
  // }
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  async embedTextInput(textInput) {
    return await this.embedder.embedTextInput(textInput);
--- a/server/utils/AiProviders/perplexity/index.js
+++ b/server/utils/AiProviders/perplexity/index.js
@ -1,5 +1,4 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { chatPrompt } = require("../../chats");
 const {
  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@ -50,7 +49,7 @@ class PerplexityLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  promptWindowLimit() {
@ -81,65 +80,6 @@ class PerplexityLLM {
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `Perplexity chat: ${this.model} is not valid for chat completion!`
      );
    const textResponse = await this.openai.chat.completions
      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      })
      .then((result) => {
        if (!result.hasOwnProperty("choices"))
          throw new Error("Perplexity chat: No results!");
        if (result.choices.length === 0)
          throw new Error("Perplexity chat: No results length!");
        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
          `Perplexity::createChatCompletion failed with: ${error.message}`
        );
      });
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `Perplexity chat: ${this.model} is not valid for chat completion!`
      );
    const streamRequest = await this.openai.chat.completions.create({
      model: this.model,
      stream: true,
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
      n: 1,
      messages: await this.compressMessages(
        {
          systemPrompt: chatPrompt(workspace),
          userPrompt: prompt,
          chatHistory,
        },
        rawHistory
      ),
    });
    return streamRequest;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
--- a/server/utils/AiProviders/togetherAi/index.js
+++ b/server/utils/AiProviders/togetherAi/index.js
@ -1,4 +1,3 @@
 const { chatPrompt } = require("../../chats");
 const {
  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@ -49,7 +48,7 @@ class TogetherAiLLM {
  }
  streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
  }
  // Ensure the user set a value for the token limit
@ -82,65 +81,6 @@ class TogetherAiLLM {
    return { safe: true, reasons: [] };
  }
  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `Together AI chat: ${this.model} is not valid for chat completion!`
      );
    const textResponse = await this.openai.chat.completions
      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
        messages: await this.compressMessages(
          {
            systemPrompt: chatPrompt(workspace),
            userPrompt: prompt,
            chatHistory,
          },
          rawHistory
        ),
      })
      .then((result) => {
        if (!result.hasOwnProperty("choices"))
          throw new Error("Together AI chat: No results!");
        if (result.choices.length === 0)
          throw new Error("Together AI chat: No results length!");
        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
          `TogetherAI::createChatCompletion failed with: ${error.message}`
        );
      });
    return textResponse;
  }
  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
        `TogetherAI chat: ${this.model} is not valid for chat completion!`
      );
    const streamRequest = await this.openai.chat.completions.create({
      model: this.model,
      stream: true,
      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
      n: 1,
      messages: await this.compressMessages(
        {
          systemPrompt: chatPrompt(workspace),
          userPrompt: prompt,
          chatHistory,
        },
        rawHistory
      ),
    });
    return streamRequest;
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!(await this.isValidChatCompletionModel(this.model)))
      throw new Error(
--- a/server/utils/EmbeddingEngines/cohere/index.js
+++ b/server/utils/EmbeddingEngines/cohere/index.js
@ -0,0 +1,86 @@
 const { toChunks } = require("../../helpers");
 class CohereEmbedder {
  constructor() {
    if (!process.env.COHERE_API_KEY)
      throw new Error("No Cohere API key was set.");
    const { CohereClient } = require("cohere-ai");
    const cohere = new CohereClient({
      token: process.env.COHERE_API_KEY,
    });
    this.cohere = cohere;
    this.model = process.env.EMBEDDING_MODEL_PREF || "embed-english-v3.0";
    this.inputType = "search_document";
    // Limit of how many strings we can process in a single pass to stay with resource or network limits
    this.maxConcurrentChunks = 96; // Cohere's limit per request is 96
    this.embeddingMaxChunkLength = 1945; // https://docs.cohere.com/docs/embed-2 - assume a token is roughly 4 letters with some padding
  }
  async embedTextInput(textInput) {
    this.inputType = "search_query";
    const result = await this.embedChunks([textInput]);
    return result?.[0] || [];
  }
  async embedChunks(textChunks = []) {
    const embeddingRequests = [];
    this.inputType = "search_document";
    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
      embeddingRequests.push(
        new Promise((resolve) => {
          this.cohere
            .embed({
              texts: chunk,
              model: this.model,
              inputType: this.inputType,
            })
            .then((res) => {
              resolve({ data: res.embeddings, error: null });
            })
            .catch((e) => {
              e.type =
                e?.response?.data?.error?.code ||
                e?.response?.status ||
                "failed_to_embed";
              e.message = e?.response?.data?.error?.message || e.message;
              resolve({ data: [], error: e });
            });
        })
      );
    }
    const { data = [], error = null } = await Promise.all(
      embeddingRequests
    ).then((results) => {
      const errors = results
        .filter((res) => !!res.error)
        .map((res) => res.error)
        .flat();
      if (errors.length > 0) {
        let uniqueErrors = new Set();
        errors.map((error) =>
          uniqueErrors.add(`[${error.type}]: ${error.message}`)
        );
        return { data: [], error: Array.from(uniqueErrors).join(", ") };
      }
      return {
        data: results.map((res) => res?.data || []).flat(),
        error: null,
      };
    });
    if (!!error) throw new Error(`Cohere Failed to embed: ${error}`);
    return data.length > 0 ? data : null;
  }
 }
 module.exports = {
  CohereEmbedder,
 };
--- a/server/utils/EmbeddingEngines/native/index.js
+++ b/server/utils/EmbeddingEngines/native/index.js
@ -107,14 +107,21 @@ class NativeEmbedder {
      );
    let fetchResponse = await this.#fetchWithHost();
-    if (fetchResponse.pipeline !== null) return fetchResponse.pipeline;
+    if (fetchResponse.pipeline !== null) {
      this.modelDownloaded = true;
      return fetchResponse.pipeline;
    }
    this.log(
      `Failed to download model from primary URL. Using fallback ${fetchResponse.retry}`
    );
    if (!!fetchResponse.retry)
      fetchResponse = await this.#fetchWithHost(fetchResponse.retry);
-    if (fetchResponse.pipeline !== null) return fetchResponse.pipeline;
+    if (fetchResponse.pipeline !== null) {
      this.modelDownloaded = true;
      return fetchResponse.pipeline;
    }
    throw fetchResponse.error;
  }
--- a/server/utils/chats/commands/reset.js
+++ b/server/utils/chats/commands/reset.js
@ -23,6 +23,7 @@ async function resetMemory(
    sources: [],
    close: true,
    error: false,
    action: "reset_chat",
  };
 }
--- a/server/utils/chats/embed.js
+++ b/server/utils/chats/embed.js
@ -131,7 +131,11 @@ async function streamChatWithForEmbed(
  // If in query mode and no sources are found, do not
  // let the LLM try to hallucinate a response or use general knowledge
-  if (chatMode === "query" && sources.length === 0) {
+  if (
    chatMode === "query" &&
    sources.length === 0 &&
    pinnedDocIdentifiers.length === 0
  ) {
    writeResponseChunk(response, {
      id: uuid,
      type: "textResponse",
--- a/server/utils/chats/index.js
+++ b/server/utils/chats/index.js
@ -140,9 +140,13 @@ async function chatWithWorkspace(
  contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
  sources = [...sources, ...vectorSearchResults.sources];
-  // If in query mode and no sources are found, do not
+  // If in query mode and no sources are found from the vector search and no pinned documents, do not
  // let the LLM try to hallucinate a response or use general knowledge and exit early
-  if (chatMode === "query" && sources.length === 0) {
+  if (
    chatMode === "query" &&
    vectorSearchResults.sources.length === 0 &&
    pinnedDocIdentifiers.length === 0
  ) {
    return {
      id: uuid,
      type: "textResponse",
--- a/server/utils/chats/stream.js
+++ b/server/utils/chats/stream.js
@ -160,9 +160,13 @@ async function streamChatWithWorkspace(
  contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
  sources = [...sources, ...vectorSearchResults.sources];
-  // If in query mode and no sources are found, do not
+  // If in query mode and no sources are found from the vector search and no pinned documents, do not
  // let the LLM try to hallucinate a response or use general knowledge and exit early
-  if (chatMode === "query" && sources.length === 0) {
+  if (
    chatMode === "query" &&
    sources.length === 0 &&
    pinnedDocIdentifiers.length === 0
  ) {
    writeResponseChunk(response, {
      id: uuid,
      type: "textResponse",
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -14,6 +14,7 @@ const SUPPORT_CUSTOM_MODELS = [
  "perplexity",
  "openrouter",
  "lmstudio",
  "koboldcpp",
 ];
 async function getCustomModels(provider = "", apiKey = null, basePath = null) {
@ -39,6 +40,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
      return await getOpenRouterModels();
    case "lmstudio":
      return await getLMStudioModels(basePath);
    case "koboldcpp":
      return await getKoboldCPPModels(basePath);
    default:
      return { models: [], error: "Invalid provider for custom models" };
  }
@ -171,6 +174,28 @@ async function getLMStudioModels(basePath = null) {
  }
 }
 async function getKoboldCPPModels(basePath = null) {
  try {
    const { OpenAI: OpenAIApi } = require("openai");
    const openai = new OpenAIApi({
      baseURL: basePath || process.env.LMSTUDIO_BASE_PATH,
      apiKey: null,
    });
    const models = await openai.models
      .list()
      .then((results) => results.data)
      .catch((e) => {
        console.error(`KoboldCPP:listModels`, e.message);
        return [];
      });
    return { models, error: null };
  } catch (e) {
    console.error(`KoboldCPP:getKoboldCPPModels`, e.message);
    return { models: [], error: "Could not fetch KoboldCPP Models" };
  }
 }
 async function ollamaAIModels(basePath = null) {
  let url;
  try {
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@ -77,6 +77,12 @@ function getLLMProvider({ provider = null, model = null } = {}) {
    case "groq":
      const { GroqLLM } = require("../AiProviders/groq");
      return new GroqLLM(embedder, model);
    case "koboldcpp":
      const { KoboldCPPLLM } = require("../AiProviders/koboldCPP");
      return new KoboldCPPLLM(embedder, model);
    case "cohere":
      const { CohereLLM } = require("../AiProviders/cohere");
      return new CohereLLM(embedder, model);
    case "generic-openai":
      const { GenericOpenAiLLM } = require("../AiProviders/genericOpenAi");
      return new GenericOpenAiLLM(embedder, model);
@ -110,6 +116,9 @@ function getEmbeddingEngineSelection() {
    case "lmstudio":
      const { LMStudioEmbedder } = require("../EmbeddingEngines/lmstudio");
      return new LMStudioEmbedder();
    case "cohere":
      const { CohereEmbedder } = require("../EmbeddingEngines/cohere");
      return new CohereEmbedder();
    default:
      return null;
  }
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -132,6 +132,20 @@ const KEY_MAPPING = {
    checks: [nonZero],
  },
  // KoboldCPP Settings
  KoboldCPPBasePath: {
    envKey: "KOBOLD_CPP_BASE_PATH",
    checks: [isNotEmpty, isValidURL],
  },
  KoboldCPPModelPref: {
    envKey: "KOBOLD_CPP_MODEL_PREF",
    checks: [isNotEmpty],
  },
  KoboldCPPTokenLimit: {
    envKey: "KOBOLD_CPP_MODEL_TOKEN_LIMIT",
    checks: [nonZero],
  },
  // Generic OpenAI InferenceSettings
  GenericOpenAiBasePath: {
    envKey: "GENERIC_OPEN_AI_BASE_PATH",
@ -290,6 +304,16 @@ const KEY_MAPPING = {
    checks: [isNotEmpty],
  },
  // Cohere Options
  CohereApiKey: {
    envKey: "COHERE_API_KEY",
    checks: [isNotEmpty],
  },
  CohereModelPref: {
    envKey: "COHERE_MODEL_PREF",
    checks: [isNotEmpty],
  },
  // Whisper (transcription) providers
  WhisperProvider: {
    envKey: "WHISPER_PROVIDER",
@ -393,6 +417,8 @@ function supportedLLM(input = "") {
    "perplexity",
    "openrouter",
    "groq",
    "koboldcpp",
    "cohere",
    "generic-openai",
  ].includes(input);
  return validSelection ? null : `${input} is not a valid LLM provider.`;
@ -434,6 +460,7 @@ function supportedEmbeddingModel(input = "") {
    "native",
    "ollama",
    "lmstudio",
    "cohere",
  ];
  return supported.includes(input)
    ? null
--- a/server/yarn.lock
+++ b/server/yarn.lock
@ -1817,6 +1817,17 @@ cmake-js@^7.2.1:
    which "^2.0.2"
    yargs "^17.7.2"
 cohere-ai@^7.9.5:
  version "7.9.5"
  resolved "https://registry.yarnpkg.com/cohere-ai/-/cohere-ai-7.9.5.tgz#05a592fe19decb8692d1b19d93ac835d7f816b8b"
  integrity sha512-tr8LUR3Q46agFpfEwaYwzYO4qAuN0/R/8YroG4bc86LadOacBAabctZUq0zfCdLiL7gB4yWJs4QCzfpRH3rQuw==
  dependencies:
    form-data "4.0.0"
    js-base64 "3.7.2"
    node-fetch "2.7.0"
    qs "6.11.2"
    url-join "4.0.1"
 color-convert@^1.9.3:
  version "1.9.3"
  resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8"
@ -2846,19 +2857,19 @@ form-data-encoder@1.7.2:
  resolved "https://registry.yarnpkg.com/form-data-encoder/-/form-data-encoder-1.7.2.tgz#1f1ae3dccf58ed4690b86d87e4f57c654fbab040"
  integrity sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==
-form-data@^3.0.0:
+form-data@4.0.0, form-data@^4.0.0:
-  version "3.0.1"
+  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f"
+  resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.0.tgz#93919daeaf361ee529584b9b31664dc12c9fa452"
-  integrity sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==
+  integrity sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==
  dependencies:
    asynckit "^0.4.0"
    combined-stream "^1.0.8"
    mime-types "^2.1.12"
-form-data@^4.0.0:
+form-data@^3.0.0:
-  version "4.0.0"
+  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.0.tgz#93919daeaf361ee529584b9b31664dc12c9fa452"
+  resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f"
-  integrity sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==
+  integrity sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==
  dependencies:
    asynckit "^0.4.0"
    combined-stream "^1.0.8"
@ -3652,6 +3663,11 @@ joi@^17.11.0:
    "@sideway/formula" "^3.0.1"
    "@sideway/pinpoint" "^2.0.0"
 js-base64@3.7.2:
  version "3.7.2"
  resolved "https://registry.yarnpkg.com/js-base64/-/js-base64-3.7.2.tgz#816d11d81a8aff241603d19ce5761e13e41d7745"
  integrity sha512-NnRs6dsyqUXejqk/yv2aiXlAvOs56sLkX6nUdeaNezI5LFFLlsZjOThmwnrcwh5ZZRwZlCMnVAY3CvhIhoVEKQ==
 js-tiktoken@^1.0.11, js-tiktoken@^1.0.7, js-tiktoken@^1.0.8:
  version "1.0.11"
  resolved "https://registry.yarnpkg.com/js-tiktoken/-/js-tiktoken-1.0.11.tgz#d7d707b849f703841112660d9d55169424a35344"
@ -4324,7 +4340,7 @@ node-domexception@1.0.0:
  resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5"
  integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==
-node-fetch@^2.6.1, node-fetch@^2.6.12, node-fetch@^2.6.7, node-fetch@^2.6.9:
+node-fetch@2.7.0, node-fetch@^2.6.1, node-fetch@^2.6.12, node-fetch@^2.6.7, node-fetch@^2.6.9:
  version "2.7.0"
  resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.7.0.tgz#d0f0fa6e3e2dc1d27efcd8ad99d550bda94d187d"
  integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==
@ -4947,6 +4963,13 @@ qs@6.11.0:
  dependencies:
    side-channel "^1.0.4"
 qs@6.11.2:
  version "6.11.2"
  resolved "https://registry.yarnpkg.com/qs/-/qs-6.11.2.tgz#64bea51f12c1f5da1bc01496f48ffcff7c69d7d9"
  integrity sha512-tDNIz22aBzCDxLtVH++VnTfzxlfeK5CbqohpSqpJgj1Wg/cQbStNAz3NuqCs5vV+pjBsK4x4pN9HlVh7rcYRiA==
  dependencies:
    side-channel "^1.0.4"
 qs@^6.7.0:
  version "6.12.1"
  resolved "https://registry.yarnpkg.com/qs/-/qs-6.12.1.tgz#39422111ca7cbdb70425541cba20c7d7b216599a"
@ -5862,7 +5885,7 @@ uri-js@^4.2.2, uri-js@^4.4.1:
  dependencies:
    punycode "^2.1.0"
-url-join@^4.0.1:
+url-join@4.0.1, url-join@^4.0.1:
  version "4.0.1"
  resolved "https://registry.yarnpkg.com/url-join/-/url-join-4.0.1.tgz#b642e21a2646808ffa178c4c5fda39844e12cde7"
  integrity sha512-jk1+QP6ZJqyOiuEI9AEWQfju/nB2Pw466kbA0LEZljHwKeMgd9WrAEgEGxjPDD2+TNbbb37rTyhEfrCXfuKXnA==