diff --git a/README.md b/README.md
index a56d24ac1..d5e4c3013 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 </p>
 
 <p align="center">
-    <b>AnythingLLM: The all-in-one AI app you were looking for.<br />
+    <b>AnythingLLM:</b> The all-in-one AI app you were looking for.<br />
     Chat with your docs, use AI Agents, hyper-configurable, multi-user, & no fustrating set up required.
 </p>
 
diff --git a/collector/utils/WhisperProviders/OpenAiWhisper.js b/collector/utils/WhisperProviders/OpenAiWhisper.js
index 8460ffea0..fc163eddf 100644
--- a/collector/utils/WhisperProviders/OpenAiWhisper.js
+++ b/collector/utils/WhisperProviders/OpenAiWhisper.js
@@ -22,7 +22,6 @@ class OpenAiWhisper {
       .create({
         file: fs.createReadStream(fullFilePath),
         model: this.model,
-        model: "whisper-1",
         response_format: "text",
         temperature: this.temperature,
       })
diff --git a/collector/utils/extensions/Confluence/index.js b/collector/utils/extensions/Confluence/index.js
index 1ea642e1a..351624406 100644
--- a/collector/utils/extensions/Confluence/index.js
+++ b/collector/utils/extensions/Confluence/index.js
@@ -66,11 +66,8 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
   const outFolder = slugify(
     `${subdomain}-confluence-${v4().slice(0, 4)}`
   ).toLowerCase();
-  const outFolderPath = path.resolve(
-    __dirname,
-    `../../../../server/storage/documents/${outFolder}`
-  );
-  fs.mkdirSync(outFolderPath);
+  const outFolderPath = path.resolve(documentsFolder, outFolder);
+  if (!fs.existsSync(outFolderPath)) fs.mkdirSync(outFolderPath, { recursive: true });
 
   docs.forEach((doc) => {
     const data = {
diff --git a/collector/utils/extensions/GithubRepo/index.js b/collector/utils/extensions/GithubRepo/index.js
index 51f39d895..a87445dad 100644
--- a/collector/utils/extensions/GithubRepo/index.js
+++ b/collector/utils/extensions/GithubRepo/index.js
@@ -32,7 +32,7 @@ async function loadGithubRepo(args) {
     `${repo.author}-${repo.project}-${repo.branch}-${v4().slice(0, 4)}`
   ).toLowerCase();
   const outFolderPath = path.resolve(documentsFolder, outFolder);
-  fs.mkdirSync(outFolderPath);
+  if (!fs.existsSync(outFolderPath)) fs.mkdirSync(outFolderPath, { recursive: true });
 
   for (const doc of docs) {
     if (!doc.pageContent) continue;
diff --git a/collector/utils/extensions/YoutubeTranscript/index.js b/collector/utils/extensions/YoutubeTranscript/index.js
index b1622870c..8c541bd31 100644
--- a/collector/utils/extensions/YoutubeTranscript/index.js
+++ b/collector/utils/extensions/YoutubeTranscript/index.js
@@ -9,9 +9,8 @@ const { YoutubeLoader } = require("./YoutubeLoader");
 function validYoutubeVideoUrl(link) {
   const UrlPattern = require("url-pattern");
   const opts = new URL(link);
-  const url = `${opts.protocol}//${opts.host}${opts.pathname}${
-    opts.searchParams.has("v") ? `?v=${opts.searchParams.get("v")}` : ""
-  }`;
+  const url = `${opts.protocol}//${opts.host}${opts.pathname}${opts.searchParams.has("v") ? `?v=${opts.searchParams.get("v")}` : ""
+    }`;
 
   const shortPatternMatch = new UrlPattern(
     "https\\://(www.)youtu.be/(:videoId)"
@@ -68,7 +67,7 @@ async function loadYouTubeTranscript({ url }) {
     `${metadata.author} YouTube transcripts`
   ).toLowerCase();
   const outFolderPath = path.resolve(documentsFolder, outFolder);
-  if (!fs.existsSync(outFolderPath)) fs.mkdirSync(outFolderPath);
+  if (!fs.existsSync(outFolderPath)) fs.mkdirSync(outFolderPath, { recursive: true });
 
   const data = {
     id: v4(),
diff --git a/docker/.env.example b/docker/.env.example
index 3a0a68c52..e10ace026 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -66,12 +66,21 @@ GID='1000'
 # GROQ_API_KEY=gsk_abcxyz
 # GROQ_MODEL_PREF=llama3-8b-8192
 
+# LLM_PROVIDER='koboldcpp'
+# KOBOLD_CPP_BASE_PATH='http://127.0.0.1:5000/v1'
+# KOBOLD_CPP_MODEL_PREF='koboldcpp/codellama-7b-instruct.Q4_K_S'
+# KOBOLD_CPP_MODEL_TOKEN_LIMIT=4096
+
 # LLM_PROVIDER='generic-openai'
 # GENERIC_OPEN_AI_BASE_PATH='http://proxy.url.openai.com/v1'
 # GENERIC_OPEN_AI_MODEL_PREF='gpt-3.5-turbo'
 # GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=4096
 # GENERIC_OPEN_AI_API_KEY=sk-123abc
 
+# LLM_PROVIDER='cohere'
+# COHERE_API_KEY=
+# COHERE_MODEL_PREF='command-r'
+
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
@@ -100,6 +109,10 @@ GID='1000'
 # EMBEDDING_MODEL_PREF='nomic-ai/nomic-embed-text-v1.5-GGUF/nomic-embed-text-v1.5.Q4_0.gguf'
 # EMBEDDING_MODEL_MAX_CHUNK_LENGTH=8192
 
+# EMBEDDING_ENGINE='cohere'
+# COHERE_API_KEY=
+# EMBEDDING_MODEL_PREF='embed-english-v3.0'
+
 ###########################################
 ######## Vector Database Selection ########
 ###########################################
diff --git a/embed/src/utils/chat/markdown.js b/embed/src/utils/chat/markdown.js
index d82c4fa7c..bb21e6925 100644
--- a/embed/src/utils/chat/markdown.js
+++ b/embed/src/utils/chat/markdown.js
@@ -4,7 +4,7 @@ import { staticHljs as hljs } from "./hljs";
 import { v4 } from "uuid";
 
 const markdown = markdownIt({
-  html: true,
+  html: false,
   typographer: true,
   highlight: function (code, lang) {
     const uuid = v4();
diff --git a/frontend/src/components/EmbeddingSelection/CohereOptions/index.jsx b/frontend/src/components/EmbeddingSelection/CohereOptions/index.jsx
new file mode 100644
index 000000000..7cb092374
--- /dev/null
+++ b/frontend/src/components/EmbeddingSelection/CohereOptions/index.jsx
@@ -0,0 +1,55 @@
+export default function CohereEmbeddingOptions({ settings }) {
+  return (
+    <div className="w-full flex flex-col gap-y-4">
+      <div className="w-full flex items-center gap-4">
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-4">
+            API Key
+          </label>
+          <input
+            type="password"
+            name="CohereApiKey"
+            className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
+            placeholder="Cohere API Key"
+            defaultValue={settings?.CohereApiKey ? "*".repeat(20) : ""}
+            required={true}
+            autoComplete="off"
+            spellCheck={false}
+          />
+        </div>
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-4">
+            Model Preference
+          </label>
+          <select
+            name="EmbeddingModelPref"
+            required={true}
+            className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+          >
+            <optgroup label="Available embedding models">
+              {[
+                "embed-english-v3.0",
+                "embed-multilingual-v3.0",
+                "embed-english-light-v3.0",
+                "embed-multilingual-light-v3.0",
+                "embed-english-v2.0",
+                "embed-english-light-v2.0",
+                "embed-multilingual-v2.0",
+              ].map((model) => {
+                return (
+                  <option
+                    key={model}
+                    value={model}
+                    selected={settings?.EmbeddingModelPref === model}
+                  >
+                    {model}
+                  </option>
+                );
+              })}
+            </optgroup>
+          </select>
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/components/LLMSelection/CohereAiOptions/index.jsx b/frontend/src/components/LLMSelection/CohereAiOptions/index.jsx
new file mode 100644
index 000000000..a20c8b81f
--- /dev/null
+++ b/frontend/src/components/LLMSelection/CohereAiOptions/index.jsx
@@ -0,0 +1,49 @@
+export default function CohereAiOptions({ settings }) {
+  return (
+    <div className="w-full flex flex-col">
+      <div className="w-full flex items-center gap-4">
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-4">
+            Cohere API Key
+          </label>
+          <input
+            type="password"
+            name="CohereApiKey"
+            className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
+            placeholder="Cohere API Key"
+            defaultValue={settings?.CohereApiKey ? "*".repeat(20) : ""}
+            required={true}
+            autoComplete="off"
+            spellCheck={false}
+          />
+        </div>
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-4">
+            Chat Model Selection
+          </label>
+          <select
+            name="CohereModelPref"
+            defaultValue={settings?.CohereModelPref || "command-r"}
+            required={true}
+            className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+          >
+            {[
+              "command-r",
+              "command-r-plus",
+              "command",
+              "command-light",
+              "command-nightly",
+              "command-light-nightly",
+            ].map((model) => {
+              return (
+                <option key={model} value={model}>
+                  {model}
+                </option>
+              );
+            })}
+          </select>
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/components/LLMSelection/KoboldCPPOptions/index.jsx b/frontend/src/components/LLMSelection/KoboldCPPOptions/index.jsx
new file mode 100644
index 000000000..7e5e20aef
--- /dev/null
+++ b/frontend/src/components/LLMSelection/KoboldCPPOptions/index.jsx
@@ -0,0 +1,112 @@
+import { useState, useEffect } from "react";
+import System from "@/models/system";
+
+export default function KoboldCPPOptions({ settings }) {
+  const [basePathValue, setBasePathValue] = useState(
+    settings?.KoboldCPPBasePath
+  );
+  const [basePath, setBasePath] = useState(settings?.KoboldCPPBasePath);
+
+  return (
+    <div className="flex gap-4 flex-wrap">
+      <div className="flex flex-col w-60">
+        <label className="text-white text-sm font-semibold block mb-4">
+          Base URL
+        </label>
+        <input
+          type="url"
+          name="KoboldCPPBasePath"
+          className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
+          placeholder="http://127.0.0.1:5000/v1"
+          defaultValue={settings?.KoboldCPPBasePath}
+          required={true}
+          autoComplete="off"
+          spellCheck={false}
+          onChange={(e) => setBasePathValue(e.target.value)}
+          onBlur={() => setBasePath(basePathValue)}
+        />
+      </div>
+      <KoboldCPPModelSelection settings={settings} basePath={basePath} />
+      <div className="flex flex-col w-60">
+        <label className="text-white text-sm font-semibold block mb-4">
+          Token context window
+        </label>
+        <input
+          type="number"
+          name="KoboldCPPTokenLimit"
+          className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
+          placeholder="4096"
+          min={1}
+          onScroll={(e) => e.target.blur()}
+          defaultValue={settings?.KoboldCPPTokenLimit}
+          required={true}
+          autoComplete="off"
+        />
+      </div>
+    </div>
+  );
+}
+
+function KoboldCPPModelSelection({ settings, basePath = null }) {
+  const [customModels, setCustomModels] = useState([]);
+  const [loading, setLoading] = useState(true);
+
+  useEffect(() => {
+    async function findCustomModels() {
+      if (!basePath || !basePath.includes("/v1")) {
+        setCustomModels([]);
+        setLoading(false);
+        return;
+      }
+      setLoading(true);
+      const { models } = await System.customModels("koboldcpp", null, basePath);
+      setCustomModels(models || []);
+      setLoading(false);
+    }
+    findCustomModels();
+  }, [basePath]);
+
+  if (loading || customModels.length === 0) {
+    return (
+      <div className="flex flex-col w-60">
+        <label className="text-white text-sm font-semibold block mb-4">
+          Chat Model Selection
+        </label>
+        <select
+          name="KoboldCPPModelPref"
+          disabled={true}
+          className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+        >
+          <option disabled={true} selected={true}>
+            {basePath?.includes("/v1")
+              ? "-- loading available models --"
+              : "-- waiting for URL --"}
+          </option>
+        </select>
+      </div>
+    );
+  }
+
+  return (
+    <div className="flex flex-col w-60">
+      <label className="text-white text-sm font-semibold block mb-4">
+        Chat Model Selection
+      </label>
+      <select
+        name="KoboldCPPModelPref"
+        required={true}
+        className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+      >
+        {customModels.map((model) => (
+          <option
+            key={model.id}
+            value={model.id}
+            selected={settings?.KoboldCPPModelPref === model.id}
+          >
+            {model.id}
+          </option>
+        ))}
+      </select>
+    </div>
+  );
+}
diff --git a/frontend/src/components/Sidebar/ActiveWorkspaces/index.jsx b/frontend/src/components/Sidebar/ActiveWorkspaces/index.jsx
index 0a867e889..cf059b2fb 100644
--- a/frontend/src/components/Sidebar/ActiveWorkspaces/index.jsx
+++ b/frontend/src/components/Sidebar/ActiveWorkspaces/index.jsx
@@ -150,9 +150,13 @@ export default function ActiveWorkspaces() {
 
                       <Link
                         type="button"
-                        to={paths.workspace.settings.generalAppearance(
-                          workspace.slug
-                        )}
+                        to={
+                          isInWorkspaceSettings
+                            ? paths.workspace.chat(workspace.slug)
+                            : paths.workspace.settings.generalAppearance(
+                                workspace.slug
+                              )
+                        }
                         onMouseEnter={() => handleGearMouseEnter(workspace.id)}
                         onMouseLeave={() => handleGearMouseLeave(workspace.id)}
                         className="rounded-md flex items-center justify-center text-[#A7A8A9] hover:text-white ml-auto"
diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Chartable/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Chartable/index.jsx
index 8217fe95b..6a6e6b130 100644
--- a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Chartable/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Chartable/index.jsx
@@ -107,7 +107,7 @@ export function Chartable({ props, workspace }) {
         );
       case "line":
         return (
-          <div className="bg-zinc-900 p-8 pb-12 rounded-xl text-white h-[500px]">
+          <div className="bg-zinc-900 p-8 pb-12 rounded-xl text-white h-[500px] w-full">
             <h3 className="text-lg font-medium">{title}</h3>
             <LineChart
               className="h-[400px]"
@@ -371,7 +371,7 @@ export function Chartable({ props, workspace }) {
         <div className="py-2 px-4 w-full flex gap-x-5 md:max-w-[800px] flex-col">
           <div className="flex gap-x-5">
             <WorkspaceProfileImage workspace={workspace} />
-            <div className="relative">
+            <div className="relative w-full">
               <DownloadGraph onClick={handleDownload} />
               <div ref={ref}>{renderChart()}</div>
               <span
@@ -390,7 +390,7 @@ export function Chartable({ props, workspace }) {
   return (
     <div className="flex justify-center items-end w-full">
       <div className="py-2 px-4 w-full flex gap-x-5 md:max-w-[800px] flex-col">
-        <div className="relative">
+        <div className="relative w-full">
           <DownloadGraph onClick={handleDownload} />
           <div ref={ref}>{renderChart()}</div>
         </div>
diff --git a/frontend/src/hooks/useGetProvidersModels.js b/frontend/src/hooks/useGetProvidersModels.js
index fb35230c9..dfd468111 100644
--- a/frontend/src/hooks/useGetProvidersModels.js
+++ b/frontend/src/hooks/useGetProvidersModels.js
@@ -26,6 +26,14 @@ const PROVIDER_DEFAULT_MODELS = {
     "gemma-7b-it",
   ],
   native: [],
+  cohere: [
+    "command-r",
+    "command-r-plus",
+    "command",
+    "command-light",
+    "command-nightly",
+    "command-light-nightly",
+  ],
 };
 
 // For togetherAi, which has a large model list - we subgroup the options
diff --git a/frontend/src/media/llmprovider/cohere.png b/frontend/src/media/llmprovider/cohere.png
new file mode 100644
index 000000000..5ec86e49b
Binary files /dev/null and b/frontend/src/media/llmprovider/cohere.png differ
diff --git a/frontend/src/media/llmprovider/koboldcpp.png b/frontend/src/media/llmprovider/koboldcpp.png
new file mode 100644
index 000000000..5724f04ab
Binary files /dev/null and b/frontend/src/media/llmprovider/koboldcpp.png differ
diff --git a/frontend/src/pages/Admin/Logging/index.jsx b/frontend/src/pages/Admin/Logging/index.jsx
index 69a81ab56..498247849 100644
--- a/frontend/src/pages/Admin/Logging/index.jsx
+++ b/frontend/src/pages/Admin/Logging/index.jsx
@@ -9,6 +9,22 @@ import showToast from "@/utils/toast";
 import CTAButton from "@/components/lib/CTAButton";
 
 export default function AdminLogs() {
+  const query = useQuery();
+  const [loading, setLoading] = useState(true);
+  const [logs, setLogs] = useState([]);
+  const [offset, setOffset] = useState(Number(query.get("offset") || 0));
+  const [canNext, setCanNext] = useState(false);
+
+  useEffect(() => {
+    async function fetchLogs() {
+      const { logs: _logs, hasPages = false } = await System.eventLogs(offset);
+      setLogs(_logs);
+      setCanNext(hasPages);
+      setLoading(false);
+    }
+    fetchLogs();
+  }, [offset]);
+
   const handleResetLogs = async () => {
     if (
       !window.confirm(
@@ -19,13 +35,22 @@ export default function AdminLogs() {
     const { success, error } = await System.clearEventLogs();
     if (success) {
       showToast("Event logs cleared successfully.", "success");
-      setTimeout(() => {
-        window.location.reload();
-      }, 1000);
+      setLogs([]);
+      setCanNext(false);
+      setOffset(0);
     } else {
       showToast(`Failed to clear logs: ${error}`, "error");
     }
   };
+
+  const handlePrevious = () => {
+    setOffset(Math.max(offset - 1, 0));
+  };
+
+  const handleNext = () => {
+    setOffset(offset + 1);
+  };
+
   return (
     <div className="w-screen h-screen overflow-hidden bg-sidebar flex">
       <Sidebar />
@@ -53,37 +78,28 @@ export default function AdminLogs() {
               Clear Event Logs
             </CTAButton>
           </div>
-          <LogsContainer />
+          <LogsContainer
+            loading={loading}
+            logs={logs}
+            offset={offset}
+            canNext={canNext}
+            handleNext={handleNext}
+            handlePrevious={handlePrevious}
+          />
         </div>
       </div>
     </div>
   );
 }
 
-function LogsContainer() {
-  const query = useQuery();
-  const [loading, setLoading] = useState(true);
-  const [logs, setLogs] = useState([]);
-  const [offset, setOffset] = useState(Number(query.get("offset") || 0));
-  const [canNext, setCanNext] = useState(false);
-
-  const handlePrevious = () => {
-    setOffset(Math.max(offset - 1, 0));
-  };
-  const handleNext = () => {
-    setOffset(offset + 1);
-  };
-
-  useEffect(() => {
-    async function fetchLogs() {
-      const { logs: _logs, hasPages = false } = await System.eventLogs(offset);
-      setLogs(_logs);
-      setCanNext(hasPages);
-      setLoading(false);
-    }
-    fetchLogs();
-  }, [offset]);
-
+function LogsContainer({
+  loading,
+  logs,
+  offset,
+  canNext,
+  handleNext,
+  handlePrevious,
+}) {
   if (loading) {
     return (
       <Skeleton.default
diff --git a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
index d8e790f39..8f234b5ac 100644
--- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
@@ -9,6 +9,7 @@ import AzureOpenAiLogo from "@/media/llmprovider/azure.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
 import OllamaLogo from "@/media/llmprovider/ollama.png";
 import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
+import CohereLogo from "@/media/llmprovider/cohere.png";
 import PreLoader from "@/components/Preloader";
 import ChangeWarningModal from "@/components/ChangeWarning";
 import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions";
@@ -17,6 +18,8 @@ import LocalAiOptions from "@/components/EmbeddingSelection/LocalAiOptions";
 import NativeEmbeddingOptions from "@/components/EmbeddingSelection/NativeEmbeddingOptions";
 import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions";
 import LMStudioEmbeddingOptions from "@/components/EmbeddingSelection/LMStudioOptions";
+import CohereEmbeddingOptions from "@/components/EmbeddingSelection/CohereOptions";
+
 import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem";
 import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
 import { useModal } from "@/hooks/useModal";
@@ -68,6 +71,13 @@ const EMBEDDERS = [
     description:
       "Discover, download, and run thousands of cutting edge LLMs in a few clicks.",
   },
+  {
+    name: "Cohere",
+    value: "cohere",
+    logo: CohereLogo,
+    options: (settings) => <CohereEmbeddingOptions settings={settings} />,
+    description: "Run powerful embedding models from Cohere.",
+  },
 ];
 
 export default function GeneralEmbeddingPreference() {
diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
index 64526ba92..941f02cb3 100644
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@@ -17,6 +17,8 @@ import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import PerplexityLogo from "@/media/llmprovider/perplexity.png";
 import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
 import GroqLogo from "@/media/llmprovider/groq.png";
+import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png";
+import CohereLogo from "@/media/llmprovider/cohere.png";
 import PreLoader from "@/components/Preloader";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
 import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
@@ -32,10 +34,12 @@ import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions";
 import PerplexityOptions from "@/components/LLMSelection/PerplexityOptions";
 import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions";
 import GroqAiOptions from "@/components/LLMSelection/GroqAiOptions";
+import CohereAiOptions from "@/components/LLMSelection/CohereAiOptions";
 
 import LLMItem from "@/components/LLMSelection/LLMItem";
 import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
 import CTAButton from "@/components/lib/CTAButton";
+import KoboldCPPOptions from "@/components/LLMSelection/KoboldCPPOptions";
 
 export const AVAILABLE_LLM_PROVIDERS = [
   {
@@ -150,6 +154,26 @@ export const AVAILABLE_LLM_PROVIDERS = [
       "The fastest LLM inferencing available for real-time AI applications.",
     requiredConfig: ["GroqApiKey"],
   },
+  {
+    name: "KoboldCPP",
+    value: "koboldcpp",
+    logo: KoboldCPPLogo,
+    options: (settings) => <KoboldCPPOptions settings={settings} />,
+    description: "Run local LLMs using koboldcpp.",
+    requiredConfig: [
+      "KoboldCPPModelPref",
+      "KoboldCPPBasePath",
+      "KoboldCPPTokenLimit",
+    ],
+  },
+  {
+    name: "Cohere",
+    value: "cohere",
+    logo: CohereLogo,
+    options: (settings) => <CohereAiOptions settings={settings} />,
+    description: "Run Cohere's powerful Command models.",
+    requiredConfig: ["CohereApiKey"],
+  },
   {
     name: "Generic OpenAI",
     value: "generic-openai",
diff --git a/frontend/src/pages/OnboardingFlow/Steps/CustomLogo/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/CustomLogo/index.jsx
deleted file mode 100644
index 6a79f854a..000000000
--- a/frontend/src/pages/OnboardingFlow/Steps/CustomLogo/index.jsx
+++ /dev/null
@@ -1,140 +0,0 @@
-import useLogo from "@/hooks/useLogo";
-import System from "@/models/system";
-import showToast from "@/utils/toast";
-import { Plus } from "@phosphor-icons/react";
-import React, { useState, useEffect } from "react";
-import AnythingLLM from "@/media/logo/anything-llm.png";
-import paths from "@/utils/paths";
-import { useNavigate } from "react-router-dom";
-
-const TITLE = "Custom Logo";
-const DESCRIPTION =
-  "Upload your custom logo to make your chatbot yours. Optional.";
-
-export default function CustomLogo({ setHeader, setForwardBtn, setBackBtn }) {
-  const navigate = useNavigate();
-  function handleForward() {
-    navigate(paths.onboarding.userSetup());
-  }
-
-  function handleBack() {
-    navigate(paths.onboarding.llmPreference());
-  }
-
-  useEffect(() => {
-    setHeader({ title: TITLE, description: DESCRIPTION });
-    setForwardBtn({ showing: true, disabled: false, onClick: handleForward });
-    setBackBtn({ showing: true, disabled: false, onClick: handleBack });
-  }, []);
-
-  const { logo: _initLogo, setLogo: _setLogo } = useLogo();
-  const [logo, setLogo] = useState("");
-  const [isDefaultLogo, setIsDefaultLogo] = useState(true);
-
-  useEffect(() => {
-    async function logoInit() {
-      setLogo(_initLogo || "");
-      const _isDefaultLogo = await System.isDefaultLogo();
-      setIsDefaultLogo(_isDefaultLogo);
-    }
-    logoInit();
-  }, [_initLogo]);
-
-  const handleFileUpload = async (event) => {
-    const file = event.target.files[0];
-    if (!file) return false;
-
-    const objectURL = URL.createObjectURL(file);
-    setLogo(objectURL);
-
-    const formData = new FormData();
-    formData.append("logo", file);
-    const { success, error } = await System.uploadLogo(formData);
-    if (!success) {
-      showToast(`Failed to upload logo: ${error}`, "error");
-      setLogo(_initLogo);
-      return;
-    }
-
-    const logoURL = await System.fetchLogo();
-    _setLogo(logoURL);
-    setIsDefaultLogo(false);
-  };
-
-  const handleRemoveLogo = async () => {
-    setLogo("");
-    setIsDefaultLogo(true);
-
-    const { success, error } = await System.removeCustomLogo();
-    if (!success) {
-      console.error("Failed to remove logo:", error);
-      showToast(`Failed to remove logo: ${error}`, "error");
-      const logoURL = await System.fetchLogo();
-      setLogo(logoURL);
-      setIsDefaultLogo(false);
-      return;
-    }
-
-    const logoURL = await System.fetchLogo();
-    _setLogo(logoURL);
-  };
-
-  return (
-    <div className="flex items-center w-full">
-      <div className="flex gap-x-8 flex-col w-full">
-        {isDefaultLogo ? (
-          <label className="mt-5 hover:opacity-60 w-full flex justify-center transition-all duration-300">
-            <input
-              id="logo-upload"
-              type="file"
-              accept="image/*"
-              className="hidden"
-              onChange={handleFileUpload}
-            />
-            <div
-              className="max-w-[600px] w-full h-64 max-h-[600px] py-4 bg-zinc-900/50 rounded-2xl border-2 border-dashed border-white border-opacity-60 justify-center items-center inline-flex cursor-pointer"
-              htmlFor="logo-upload"
-            >
-              <div className="flex flex-col items-center justify-center">
-                <div className="rounded-full bg-white/40">
-                  <Plus className="w-6 h-6 text-black/80 m-2" />
-                </div>
-                <div className="text-white text-opacity-80 text-sm font-semibold py-1">
-                  Add a custom logo
-                </div>
-                <div className="text-white text-opacity-60 text-xs font-medium py-1">
-                  Recommended size: 800 x 200
-                </div>
-              </div>
-            </div>
-          </label>
-        ) : (
-          <div className="w-full flex justify-center">
-            <img
-              src={logo}
-              alt="Uploaded Logo"
-              className="w-48 h-48 object-contain mr-6"
-              hidden={isDefaultLogo}
-              onError={(e) => (e.target.src = AnythingLLM)}
-            />
-          </div>
-        )}
-        {!isDefaultLogo ? (
-          <button
-            onClick={handleRemoveLogo}
-            className="text-white text-base font-medium hover:text-opacity-60 mt-8"
-          >
-            Remove logo
-          </button>
-        ) : (
-          <button
-            onClick={handleForward}
-            className="text-white text-base font-medium hover:text-opacity-60 mt-8"
-          >
-            Skip
-          </button>
-        )}
-      </div>
-    </div>
-  );
-}
diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
index 548272fe0..6e8a18974 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@@ -15,6 +15,8 @@ import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import PerplexityLogo from "@/media/llmprovider/perplexity.png";
 import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
 import GroqLogo from "@/media/llmprovider/groq.png";
+import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png";
+import CohereLogo from "@/media/llmprovider/cohere.png";
 import ZillizLogo from "@/media/vectordbs/zilliz.png";
 import AstraDBLogo from "@/media/vectordbs/astraDB.png";
 import ChromaLogo from "@/media/vectordbs/chroma.png";
@@ -137,6 +139,13 @@ export const LLM_SELECTION_PRIVACY = {
     ],
     logo: GroqLogo,
   },
+  koboldcpp: {
+    name: "KoboldCPP",
+    description: [
+      "Your model and chats are only accessible on the server running KoboldCPP",
+    ],
+    logo: KoboldCPPLogo,
+  },
   "generic-openai": {
     name: "Generic OpenAI compatible service",
     description: [
@@ -144,6 +153,13 @@ export const LLM_SELECTION_PRIVACY = {
     ],
     logo: GenericOpenAiLogo,
   },
+  cohere: {
+    name: "Cohere",
+    description: [
+      "Data is shared according to the terms of service of cohere.com and your localities privacy laws.",
+    ],
+    logo: CohereLogo,
+  },
 };
 
 export const VECTOR_DB_PRIVACY = {
@@ -252,6 +268,13 @@ export const EMBEDDING_ENGINE_PRIVACY = {
     ],
     logo: LMStudioLogo,
   },
+  cohere: {
+    name: "Cohere",
+    description: [
+      "Data is shared according to the terms of service of cohere.com and your localities privacy laws.",
+    ],
+    logo: CohereLogo,
+  },
 };
 
 export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
index d7fa55cc5..5885cc215 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
@@ -14,6 +14,8 @@ import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import PerplexityLogo from "@/media/llmprovider/perplexity.png";
 import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
 import GroqLogo from "@/media/llmprovider/groq.png";
+import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png";
+import CohereLogo from "@/media/llmprovider/cohere.png";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
 import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
 import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
@@ -28,11 +30,14 @@ import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions";
 import PerplexityOptions from "@/components/LLMSelection/PerplexityOptions";
 import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions";
 import GroqAiOptions from "@/components/LLMSelection/GroqAiOptions";
+import CohereAiOptions from "@/components/LLMSelection/CohereAiOptions";
+
 import LLMItem from "@/components/LLMSelection/LLMItem";
 import System from "@/models/system";
 import paths from "@/utils/paths";
 import showToast from "@/utils/toast";
 import { useNavigate } from "react-router-dom";
+import KoboldCPPOptions from "@/components/LLMSelection/KoboldCPPOptions";
 
 const TITLE = "LLM Preference";
 const DESCRIPTION =
@@ -97,6 +102,13 @@ const LLMS = [
     options: (settings) => <LocalAiOptions settings={settings} />,
     description: "Run LLMs locally on your own machine.",
   },
+  {
+    name: "KoboldCPP",
+    value: "koboldcpp",
+    logo: KoboldCPPLogo,
+    options: (settings) => <KoboldCPPOptions settings={settings} />,
+    description: "Run local LLMs using koboldcpp.",
+  },
   {
     name: "Together AI",
     value: "togetherai",
@@ -134,6 +146,13 @@ const LLMS = [
     description:
       "The fastest LLM inferencing available for real-time AI applications.",
   },
+  {
+    name: "Cohere",
+    value: "cohere",
+    logo: CohereLogo,
+    options: (settings) => <CohereAiOptions settings={settings} />,
+    description: "Run Cohere's powerful Command models.",
+  },
   {
     name: "Generic OpenAI",
     value: "generic-openai",
@@ -200,7 +219,7 @@ export default function LLMPreference({
       showToast(`Failed to save LLM settings: ${error}`, "error");
       return;
     }
-    navigate(paths.onboarding.customLogo());
+    navigate(paths.onboarding.userSetup());
   };
 
   useEffect(() => {
diff --git a/frontend/src/pages/OnboardingFlow/Steps/UserSetup/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/UserSetup/index.jsx
index 2e619e395..6cc41428a 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/UserSetup/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/UserSetup/index.jsx
@@ -29,7 +29,7 @@ export default function UserSetup({ setHeader, setForwardBtn, setBackBtn }) {
   }
 
   function handleBack() {
-    navigate(paths.onboarding.customLogo());
+    navigate(paths.onboarding.llmPreference());
   }
 
   useEffect(() => {
diff --git a/frontend/src/pages/OnboardingFlow/Steps/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/index.jsx
index f223c0268..903395a77 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/index.jsx
@@ -3,7 +3,6 @@ import { useState } from "react";
 import { isMobile } from "react-device-detect";
 import Home from "./Home";
 import LLMPreference from "./LLMPreference";
-import CustomLogo from "./CustomLogo";
 import UserSetup from "./UserSetup";
 import DataHandling from "./DataHandling";
 import Survey from "./Survey";
@@ -12,7 +11,6 @@ import CreateWorkspace from "./CreateWorkspace";
 const OnboardingSteps = {
   home: Home,
   "llm-preference": LLMPreference,
-  "custom-logo": CustomLogo,
   "user-setup": UserSetup,
   "data-handling": DataHandling,
   survey: Survey,
diff --git a/frontend/src/pages/WorkspaceSettings/GeneralAppearance/index.jsx b/frontend/src/pages/WorkspaceSettings/GeneralAppearance/index.jsx
index b6d5b84a6..5e4053f01 100644
--- a/frontend/src/pages/WorkspaceSettings/GeneralAppearance/index.jsx
+++ b/frontend/src/pages/WorkspaceSettings/GeneralAppearance/index.jsx
@@ -36,7 +36,6 @@ export default function GeneralInfo({ slug }) {
     );
     if (!!updatedWorkspace) {
       showToast("Workspace updated!", "success", { clear: true });
-      setTimeout(() => window.location.reload(), 1_500);
     } else {
       showToast(`Error: ${message}`, "error", { clear: true });
     }
diff --git a/frontend/src/utils/chat/index.js b/frontend/src/utils/chat/index.js
index a2b18c7f4..c5730dbe0 100644
--- a/frontend/src/utils/chat/index.js
+++ b/frontend/src/utils/chat/index.js
@@ -17,6 +17,7 @@ export default function handleChat(
     error,
     close,
     chatId = null,
+    action = null,
   } = chatResult;
 
   if (type === "abort" || type === "statusResponse") {
@@ -132,6 +133,12 @@ export default function handleChat(
     setChatHistory([..._chatHistory]);
     setLoadingResponse(false);
   }
+
+  // Action Handling via special 'action' attribute on response.
+  if (action === "reset_chat") {
+    // Chat was reset, keep reset message and clear everything else.
+    setChatHistory([_chatHistory.pop()]);
+  }
 }
 
 export function chatPrompt(workspace) {
diff --git a/frontend/src/utils/chat/markdown.js b/frontend/src/utils/chat/markdown.js
index ff4af77bc..ae1db23cb 100644
--- a/frontend/src/utils/chat/markdown.js
+++ b/frontend/src/utils/chat/markdown.js
@@ -5,7 +5,7 @@ import "highlight.js/styles/github-dark-dimmed.min.css";
 import { v4 } from "uuid";
 
 const markdown = markdownIt({
-  html: true,
+  html: false,
   typographer: true,
   highlight: function (code, lang) {
     const uuid = v4();
diff --git a/frontend/src/utils/paths.js b/frontend/src/utils/paths.js
index 339ecf439..4dc4d5285 100644
--- a/frontend/src/utils/paths.js
+++ b/frontend/src/utils/paths.js
@@ -23,9 +23,6 @@ export default {
     vectorDatabase: () => {
       return "/onboarding/vector-database";
     },
-    customLogo: () => {
-      return "/onboarding/custom-logo";
-    },
     userSetup: () => {
       return "/onboarding/user-setup";
     },
diff --git a/server/.env.example b/server/.env.example
index 244d37e54..c8f05340a 100644
--- a/server/.env.example
+++ b/server/.env.example
@@ -63,12 +63,21 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
 # GROQ_API_KEY=gsk_abcxyz
 # GROQ_MODEL_PREF=llama3-8b-8192
 
+# LLM_PROVIDER='koboldcpp'
+# KOBOLD_CPP_BASE_PATH='http://127.0.0.1:5000/v1'
+# KOBOLD_CPP_MODEL_PREF='koboldcpp/codellama-7b-instruct.Q4_K_S'
+# KOBOLD_CPP_MODEL_TOKEN_LIMIT=4096
+
 # LLM_PROVIDER='generic-openai'
 # GENERIC_OPEN_AI_BASE_PATH='http://proxy.url.openai.com/v1'
 # GENERIC_OPEN_AI_MODEL_PREF='gpt-3.5-turbo'
 # GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=4096
 # GENERIC_OPEN_AI_API_KEY=sk-123abc
 
+# LLM_PROVIDER='cohere'
+# COHERE_API_KEY=
+# COHERE_MODEL_PREF='command-r'
+
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
@@ -97,6 +106,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
 # EMBEDDING_MODEL_PREF='nomic-ai/nomic-embed-text-v1.5-GGUF/nomic-embed-text-v1.5.Q4_0.gguf'
 # EMBEDDING_MODEL_MAX_CHUNK_LENGTH=8192
 
+# EMBEDDING_ENGINE='cohere'
+# COHERE_API_KEY=
+# EMBEDDING_MODEL_PREF='embed-english-v3.0'
+
 ###########################################
 ######## Vector Database Selection ########
 ###########################################
diff --git a/server/endpoints/system.js b/server/endpoints/system.js
index 497a6d703..de47bedd4 100644
--- a/server/endpoints/system.js
+++ b/server/endpoints/system.js
@@ -918,7 +918,7 @@ function systemEndpoints(app) {
     [validatedRequest, flexUserRoleValid([ROLES.admin])],
     async (request, response) => {
       try {
-        const { offset = 0, limit = 20 } = reqBody(request);
+        const { offset = 0, limit = 10 } = reqBody(request);
         const logs = await EventLogs.whereWithData({}, limit, offset * limit, {
           id: "desc",
         });
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index bf5da52f1..96ee04b10 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -363,11 +363,20 @@ const SystemSettings = {
       HuggingFaceLLMAccessToken: !!process.env.HUGGING_FACE_LLM_API_KEY,
       HuggingFaceLLMTokenLimit: process.env.HUGGING_FACE_LLM_TOKEN_LIMIT,
 
+      // KoboldCPP Keys
+      KoboldCPPModelPref: process.env.KOBOLD_CPP_MODEL_PREF,
+      KoboldCPPBasePath: process.env.KOBOLD_CPP_BASE_PATH,
+      KoboldCPPTokenLimit: process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT,
+
       // Generic OpenAI Keys
       GenericOpenAiBasePath: process.env.GENERIC_OPEN_AI_BASE_PATH,
       GenericOpenAiModelPref: process.env.GENERIC_OPEN_AI_MODEL_PREF,
       GenericOpenAiTokenLimit: process.env.GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT,
       GenericOpenAiKey: !!process.env.GENERIC_OPEN_AI_API_KEY,
+
+      // Cohere API Keys
+      CohereApiKey: !!process.env.COHERE_API_KEY,
+      CohereModelPref: process.env.COHERE_MODEL_PREF,
     };
   },
 };
diff --git a/server/package.json b/server/package.json
index 5549ba713..cb5ed8141 100644
--- a/server/package.json
+++ b/server/package.json
@@ -41,6 +41,7 @@
     "chalk": "^4",
     "check-disk-space": "^3.4.0",
     "chromadb": "^1.5.2",
+    "cohere-ai": "^7.9.5",
     "cors": "^2.8.5",
     "dotenv": "^16.0.3",
     "express": "^4.18.2",
diff --git a/server/utils/AiProviders/anthropic/index.js b/server/utils/AiProviders/anthropic/index.js
index 6a8ad3c42..d5ee1f9d3 100644
--- a/server/utils/AiProviders/anthropic/index.js
+++ b/server/utils/AiProviders/anthropic/index.js
@@ -1,5 +1,4 @@
 const { v4 } = require("uuid");
-const { chatPrompt } = require("../../chats");
 const {
   writeResponseChunk,
   clientAbortedHandler,
@@ -33,7 +32,7 @@ class AnthropicLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   promptWindowLimit() {
@@ -110,31 +109,6 @@ class AnthropicLLM {
     }
   }
 
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!this.isValidChatCompletionModel(this.model))
-      throw new Error(
-        `Anthropic chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const messages = await this.compressMessages(
-      {
-        systemPrompt: chatPrompt(workspace),
-        userPrompt: prompt,
-        chatHistory,
-      },
-      rawHistory
-    );
-
-    const streamRequest = await this.anthropic.messages.stream({
-      model: this.model,
-      max_tokens: 4096,
-      system: messages[0].content, // Strip out the system message
-      messages: messages.slice(1), // Pop off the system message
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-    });
-    return streamRequest;
-  }
-
   async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
     if (!this.isValidChatCompletionModel(this.model))
       throw new Error(
diff --git a/server/utils/AiProviders/azureOpenAi/index.js b/server/utils/AiProviders/azureOpenAi/index.js
index 21fc5cd91..a2ab556db 100644
--- a/server/utils/AiProviders/azureOpenAi/index.js
+++ b/server/utils/AiProviders/azureOpenAi/index.js
@@ -1,5 +1,4 @@
 const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
-const { chatPrompt } = require("../../chats");
 const {
   writeResponseChunk,
   clientAbortedHandler,
@@ -45,7 +44,7 @@ class AzureOpenAiLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   // Sure the user selected a proper value for the token limit
@@ -82,66 +81,6 @@ class AzureOpenAiLLM {
     return { safe: true, reasons: [] };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!this.model)
-      throw new Error(
-        "No OPEN_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an LLM chat model like GPT-3.5."
-      );
-
-    const messages = await this.compressMessages(
-      {
-        systemPrompt: chatPrompt(workspace),
-        userPrompt: prompt,
-        chatHistory,
-      },
-      rawHistory
-    );
-    const textResponse = await this.openai
-      .getChatCompletions(this.model, messages, {
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-      })
-      .then((res) => {
-        if (!res.hasOwnProperty("choices"))
-          throw new Error("AzureOpenAI chat: No results!");
-        if (res.choices.length === 0)
-          throw new Error("AzureOpenAI chat: No results length!");
-        return res.choices[0].message.content;
-      })
-      .catch((error) => {
-        console.log(error);
-        throw new Error(
-          `AzureOpenAI::getChatCompletions failed with: ${error.message}`
-        );
-      });
-    return textResponse;
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!this.model)
-      throw new Error(
-        "No OPEN_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an LLM chat model like GPT-3.5."
-      );
-
-    const messages = await this.compressMessages(
-      {
-        systemPrompt: chatPrompt(workspace),
-        userPrompt: prompt,
-        chatHistory,
-      },
-      rawHistory
-    );
-    const stream = await this.openai.streamChatCompletions(
-      this.model,
-      messages,
-      {
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-      }
-    );
-    return stream;
-  }
-
   async getChatCompletion(messages = [], { temperature = 0.7 }) {
     if (!this.model)
       throw new Error(
diff --git a/server/utils/AiProviders/cohere/index.js b/server/utils/AiProviders/cohere/index.js
new file mode 100644
index 000000000..a97a15fca
--- /dev/null
+++ b/server/utils/AiProviders/cohere/index.js
@@ -0,0 +1,226 @@
+const { v4 } = require("uuid");
+const { writeResponseChunk } = require("../../helpers/chat/responses");
+const { NativeEmbedder } = require("../../EmbeddingEngines/native");
+
+class CohereLLM {
+  constructor(embedder = null) {
+    const { CohereClient } = require("cohere-ai");
+    if (!process.env.COHERE_API_KEY)
+      throw new Error("No Cohere API key was set.");
+
+    const cohere = new CohereClient({
+      token: process.env.COHERE_API_KEY,
+    });
+
+    this.cohere = cohere;
+    this.model = process.env.COHERE_MODEL_PREF;
+    this.limits = {
+      history: this.promptWindowLimit() * 0.15,
+      system: this.promptWindowLimit() * 0.15,
+      user: this.promptWindowLimit() * 0.7,
+    };
+    this.embedder = !!embedder ? embedder : new NativeEmbedder();
+  }
+
+  #appendContext(contextTexts = []) {
+    if (!contextTexts || !contextTexts.length) return "";
+    return (
+      "\nContext:\n" +
+      contextTexts
+        .map((text, i) => {
+          return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+        })
+        .join("")
+    );
+  }
+
+  #convertChatHistoryCohere(chatHistory = []) {
+    let cohereHistory = [];
+    chatHistory.forEach((message) => {
+      switch (message.role) {
+        case "system":
+          cohereHistory.push({ role: "SYSTEM", message: message.content });
+          break;
+        case "user":
+          cohereHistory.push({ role: "USER", message: message.content });
+          break;
+        case "assistant":
+          cohereHistory.push({ role: "CHATBOT", message: message.content });
+          break;
+      }
+    });
+
+    return cohereHistory;
+  }
+
+  streamingEnabled() {
+    return "streamGetChatCompletion" in this;
+  }
+
+  promptWindowLimit() {
+    switch (this.model) {
+      case "command-r":
+        return 128_000;
+      case "command-r-plus":
+        return 128_000;
+      case "command":
+        return 4_096;
+      case "command-light":
+        return 4_096;
+      case "command-nightly":
+        return 8_192;
+      case "command-light-nightly":
+        return 8_192;
+      default:
+        return 4_096;
+    }
+  }
+
+  async isValidChatCompletionModel(model = "") {
+    const validModels = [
+      "command-r",
+      "command-r-plus",
+      "command",
+      "command-light",
+      "command-nightly",
+      "command-light-nightly",
+    ];
+    return validModels.includes(model);
+  }
+
+  constructPrompt({
+    systemPrompt = "",
+    contextTexts = [],
+    chatHistory = [],
+    userPrompt = "",
+  }) {
+    const prompt = {
+      role: "system",
+      content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
+    };
+    return [prompt, ...chatHistory, { role: "user", content: userPrompt }];
+  }
+
+  async isSafe(_input = "") {
+    // Not implemented so must be stubbed
+    return { safe: true, reasons: [] };
+  }
+
+  async getChatCompletion(messages = null, { temperature = 0.7 }) {
+    if (!(await this.isValidChatCompletionModel(this.model)))
+      throw new Error(
+        `Cohere chat: ${this.model} is not valid for chat completion!`
+      );
+
+    const message = messages[messages.length - 1].content; // Get the last message
+    const cohereHistory = this.#convertChatHistoryCohere(messages.slice(0, -1)); // Remove the last message and convert to Cohere
+
+    const chat = await this.cohere.chat({
+      model: this.model,
+      message: message,
+      chatHistory: cohereHistory,
+      temperature,
+    });
+
+    if (!chat.hasOwnProperty("text")) return null;
+    return chat.text;
+  }
+
+  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
+    if (!(await this.isValidChatCompletionModel(this.model)))
+      throw new Error(
+        `Cohere chat: ${this.model} is not valid for chat completion!`
+      );
+
+    const message = messages[messages.length - 1].content; // Get the last message
+    const cohereHistory = this.#convertChatHistoryCohere(messages.slice(0, -1)); // Remove the last message and convert to Cohere
+
+    const stream = await this.cohere.chatStream({
+      model: this.model,
+      message: message,
+      chatHistory: cohereHistory,
+      temperature,
+    });
+
+    return { type: "stream", stream: stream };
+  }
+
+  async handleStream(response, stream, responseProps) {
+    return new Promise(async (resolve) => {
+      let fullText = "";
+      const { uuid = v4(), sources = [] } = responseProps;
+
+      const handleAbort = () => {
+        writeResponseChunk(response, {
+          uuid,
+          sources,
+          type: "abort",
+          textResponse: fullText,
+          close: true,
+          error: false,
+        });
+        response.removeListener("close", handleAbort);
+        resolve(fullText);
+      };
+      response.on("close", handleAbort);
+
+      try {
+        for await (const chat of stream.stream) {
+          if (chat.eventType === "text-generation") {
+            const text = chat.text;
+            fullText += text;
+
+            writeResponseChunk(response, {
+              uuid,
+              sources,
+              type: "textResponseChunk",
+              textResponse: text,
+              close: false,
+              error: false,
+            });
+          }
+        }
+
+        writeResponseChunk(response, {
+          uuid,
+          sources,
+          type: "textResponseChunk",
+          textResponse: "",
+          close: true,
+          error: false,
+        });
+        response.removeListener("close", handleAbort);
+        resolve(fullText);
+      } catch (error) {
+        writeResponseChunk(response, {
+          uuid,
+          sources,
+          type: "abort",
+          textResponse: null,
+          close: true,
+          error: error.message,
+        });
+        response.removeListener("close", handleAbort);
+        resolve(fullText);
+      }
+    });
+  }
+
+  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
+  async embedTextInput(textInput) {
+    return await this.embedder.embedTextInput(textInput);
+  }
+  async embedChunks(textChunks = []) {
+    return await this.embedder.embedChunks(textChunks);
+  }
+
+  async compressMessages(promptArgs = {}, rawHistory = []) {
+    const { messageArrayCompressor } = require("../../helpers/chat");
+    const messageArray = this.constructPrompt(promptArgs);
+    return await messageArrayCompressor(this, messageArray, rawHistory);
+  }
+}
+
+module.exports = {
+  CohereLLM,
+};
diff --git a/server/utils/AiProviders/gemini/index.js b/server/utils/AiProviders/gemini/index.js
index 354c1899e..b9eb26c3c 100644
--- a/server/utils/AiProviders/gemini/index.js
+++ b/server/utils/AiProviders/gemini/index.js
@@ -1,4 +1,3 @@
-const { chatPrompt } = require("../../chats");
 const {
   writeResponseChunk,
   clientAbortedHandler,
@@ -48,7 +47,7 @@ class GeminiLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   promptWindowLimit() {
@@ -118,32 +117,6 @@ class GeminiLLM {
     return allMessages;
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!this.isValidChatCompletionModel(this.model))
-      throw new Error(
-        `Gemini chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const compressedHistory = await this.compressMessages(
-      {
-        systemPrompt: chatPrompt(workspace),
-        chatHistory,
-      },
-      rawHistory
-    );
-
-    const chatThread = this.gemini.startChat({
-      history: this.formatMessages(compressedHistory),
-    });
-    const result = await chatThread.sendMessage(prompt);
-    const response = result.response;
-    const responseText = response.text();
-
-    if (!responseText) throw new Error("Gemini: No response could be parsed.");
-
-    return responseText;
-  }
-
   async getChatCompletion(messages = [], _opts = {}) {
     if (!this.isValidChatCompletionModel(this.model))
       throw new Error(
@@ -165,30 +138,6 @@ class GeminiLLM {
     return responseText;
   }
 
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!this.isValidChatCompletionModel(this.model))
-      throw new Error(
-        `Gemini chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const compressedHistory = await this.compressMessages(
-      {
-        systemPrompt: chatPrompt(workspace),
-        chatHistory,
-      },
-      rawHistory
-    );
-
-    const chatThread = this.gemini.startChat({
-      history: this.formatMessages(compressedHistory),
-    });
-    const responseStream = await chatThread.sendMessageStream(prompt);
-    if (!responseStream.stream)
-      throw new Error("Could not stream response stream from Gemini.");
-
-    return responseStream.stream;
-  }
-
   async streamGetChatCompletion(messages = [], _opts = {}) {
     if (!this.isValidChatCompletionModel(this.model))
       throw new Error(
diff --git a/server/utils/AiProviders/genericOpenAi/index.js b/server/utils/AiProviders/genericOpenAi/index.js
index cf293c3e7..8c171b679 100644
--- a/server/utils/AiProviders/genericOpenAi/index.js
+++ b/server/utils/AiProviders/genericOpenAi/index.js
@@ -1,5 +1,4 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
-const { chatPrompt } = require("../../chats");
 const {
   handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@@ -53,7 +52,7 @@ class GenericOpenAiLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   // Ensure the user set a value for the token limit
@@ -89,55 +88,6 @@ class GenericOpenAiLLM {
     return { safe: true, reasons: [] };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    const textResponse = await this.openai.chat.completions
-      .create({
-        model: this.model,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      })
-      .then((result) => {
-        if (!result.hasOwnProperty("choices"))
-          throw new Error("GenericOpenAI chat: No results!");
-        if (result.choices.length === 0)
-          throw new Error("GenericOpenAI chat: No results length!");
-        return result.choices[0].message.content;
-      })
-      .catch((error) => {
-        throw new Error(
-          `GenericOpenAI::createChatCompletion failed with: ${error.message}`
-        );
-      });
-
-    return textResponse;
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    const streamRequest = await this.openai.chat.completions.create({
-      model: this.model,
-      stream: true,
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-      n: 1,
-      messages: await this.compressMessages(
-        {
-          systemPrompt: chatPrompt(workspace),
-          userPrompt: prompt,
-          chatHistory,
-        },
-        rawHistory
-      ),
-    });
-    return streamRequest;
-  }
-
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     const result = await this.openai.chat.completions
       .create({
diff --git a/server/utils/AiProviders/groq/index.js b/server/utils/AiProviders/groq/index.js
index add064af4..01d92f006 100644
--- a/server/utils/AiProviders/groq/index.js
+++ b/server/utils/AiProviders/groq/index.js
@@ -1,5 +1,4 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
-const { chatPrompt } = require("../../chats");
 const {
   handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@@ -38,7 +37,7 @@ class GroqLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   promptWindowLimit() {
@@ -91,65 +90,6 @@ class GroqLLM {
     return { safe: true, reasons: [] };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `Groq chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const textResponse = await this.openai.chat.completions
-      .create({
-        model: this.model,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      })
-      .then((result) => {
-        if (!result.hasOwnProperty("choices"))
-          throw new Error("GroqAI chat: No results!");
-        if (result.choices.length === 0)
-          throw new Error("GroqAI chat: No results length!");
-        return result.choices[0].message.content;
-      })
-      .catch((error) => {
-        throw new Error(
-          `GroqAI::createChatCompletion failed with: ${error.message}`
-        );
-      });
-
-    return textResponse;
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `GroqAI:streamChat: ${this.model} is not valid for chat completion!`
-      );
-
-    const streamRequest = await this.openai.chat.completions.create({
-      model: this.model,
-      stream: true,
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-      n: 1,
-      messages: await this.compressMessages(
-        {
-          systemPrompt: chatPrompt(workspace),
-          userPrompt: prompt,
-          chatHistory,
-        },
-        rawHistory
-      ),
-    });
-    return streamRequest;
-  }
-
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     if (!(await this.isValidChatCompletionModel(this.model)))
       throw new Error(
diff --git a/server/utils/AiProviders/huggingface/index.js b/server/utils/AiProviders/huggingface/index.js
index 22f9c2fd4..6a79880c8 100644
--- a/server/utils/AiProviders/huggingface/index.js
+++ b/server/utils/AiProviders/huggingface/index.js
@@ -1,6 +1,5 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
-const { chatPrompt } = require("../../chats");
 const {
   handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@@ -48,7 +47,7 @@ class HuggingFaceLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   promptWindowLimit() {
@@ -90,55 +89,6 @@ class HuggingFaceLLM {
     return { safe: true, reasons: [] };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    const textResponse = await this.openai.chat.completions
-      .create({
-        model: this.model,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      })
-      .then((result) => {
-        if (!result.hasOwnProperty("choices"))
-          throw new Error("HuggingFace chat: No results!");
-        if (result.choices.length === 0)
-          throw new Error("HuggingFace chat: No results length!");
-        return result.choices[0].message.content;
-      })
-      .catch((error) => {
-        throw new Error(
-          `HuggingFace::createChatCompletion failed with: ${error.message}`
-        );
-      });
-
-    return textResponse;
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    const streamRequest = await this.openai.chat.completions.create({
-      model: this.model,
-      stream: true,
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-      n: 1,
-      messages: await this.compressMessages(
-        {
-          systemPrompt: chatPrompt(workspace),
-          userPrompt: prompt,
-          chatHistory,
-        },
-        rawHistory
-      ),
-    });
-    return streamRequest;
-  }
-
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     const result = await this.openai.createChatCompletion({
       model: this.model,
diff --git a/server/utils/AiProviders/koboldCPP/index.js b/server/utils/AiProviders/koboldCPP/index.js
new file mode 100644
index 000000000..4b1ff3f61
--- /dev/null
+++ b/server/utils/AiProviders/koboldCPP/index.js
@@ -0,0 +1,180 @@
+const { NativeEmbedder } = require("../../EmbeddingEngines/native");
+const {
+  clientAbortedHandler,
+  writeResponseChunk,
+} = require("../../helpers/chat/responses");
+const { v4: uuidv4 } = require("uuid");
+
+class KoboldCPPLLM {
+  constructor(embedder = null, modelPreference = null) {
+    const { OpenAI: OpenAIApi } = require("openai");
+    if (!process.env.KOBOLD_CPP_BASE_PATH)
+      throw new Error(
+        "KoboldCPP must have a valid base path to use for the api."
+      );
+
+    this.basePath = process.env.KOBOLD_CPP_BASE_PATH;
+    this.openai = new OpenAIApi({
+      baseURL: this.basePath,
+      apiKey: null,
+    });
+    this.model = modelPreference ?? process.env.KOBOLD_CPP_MODEL_PREF ?? null;
+    if (!this.model) throw new Error("KoboldCPP must have a valid model set.");
+    this.limits = {
+      history: this.promptWindowLimit() * 0.15,
+      system: this.promptWindowLimit() * 0.15,
+      user: this.promptWindowLimit() * 0.7,
+    };
+
+    if (!embedder)
+      console.warn(
+        "No embedding provider defined for KoboldCPPLLM - falling back to NativeEmbedder for embedding!"
+      );
+    this.embedder = !embedder ? new NativeEmbedder() : embedder;
+    this.defaultTemp = 0.7;
+    this.log(`Inference API: ${this.basePath} Model: ${this.model}`);
+  }
+
+  log(text, ...args) {
+    console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
+  }
+
+  #appendContext(contextTexts = []) {
+    if (!contextTexts || !contextTexts.length) return "";
+    return (
+      "\nContext:\n" +
+      contextTexts
+        .map((text, i) => {
+          return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+        })
+        .join("")
+    );
+  }
+
+  streamingEnabled() {
+    return "streamGetChatCompletion" in this;
+  }
+
+  // Ensure the user set a value for the token limit
+  // and if undefined - assume 4096 window.
+  promptWindowLimit() {
+    const limit = process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT || 4096;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No token context limit was set.");
+    return Number(limit);
+  }
+
+  // Short circuit since we have no idea if the model is valid or not
+  // in pre-flight for generic endpoints
+  isValidChatCompletionModel(_modelName = "") {
+    return true;
+  }
+
+  constructPrompt({
+    systemPrompt = "",
+    contextTexts = [],
+    chatHistory = [],
+    userPrompt = "",
+  }) {
+    const prompt = {
+      role: "system",
+      content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
+    };
+    return [prompt, ...chatHistory, { role: "user", content: userPrompt }];
+  }
+
+  async isSafe(_input = "") {
+    // Not implemented so must be stubbed
+    return { safe: true, reasons: [] };
+  }
+
+  async getChatCompletion(messages = null, { temperature = 0.7 }) {
+    const result = await this.openai.chat.completions
+      .create({
+        model: this.model,
+        messages,
+        temperature,
+      })
+      .catch((e) => {
+        throw new Error(e.response.data.error.message);
+      });
+
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
+  }
+
+  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
+    return streamRequest;
+  }
+
+  handleStream(response, stream, responseProps) {
+    const { uuid = uuidv4(), sources = [] } = responseProps;
+
+    // Custom handler for KoboldCPP stream responses
+    return new Promise(async (resolve) => {
+      let fullText = "";
+      const handleAbort = () => clientAbortedHandler(resolve, fullText);
+      response.on("close", handleAbort);
+
+      for await (const chunk of stream) {
+        const message = chunk?.choices?.[0];
+        const token = message?.delta?.content;
+
+        if (token) {
+          fullText += token;
+          writeResponseChunk(response, {
+            uuid,
+            sources: [],
+            type: "textResponseChunk",
+            textResponse: token,
+            close: false,
+            error: false,
+          });
+        }
+
+        // KoboldCPP finishes with "length" or "stop"
+        if (
+          message.finish_reason !== "null" &&
+          (message.finish_reason === "length" ||
+            message.finish_reason === "stop")
+        ) {
+          writeResponseChunk(response, {
+            uuid,
+            sources,
+            type: "textResponseChunk",
+            textResponse: "",
+            close: true,
+            error: false,
+          });
+          response.removeListener("close", handleAbort);
+          resolve(fullText);
+        }
+      }
+    });
+  }
+
+  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
+  async embedTextInput(textInput) {
+    return await this.embedder.embedTextInput(textInput);
+  }
+  async embedChunks(textChunks = []) {
+    return await this.embedder.embedChunks(textChunks);
+  }
+
+  async compressMessages(promptArgs = {}, rawHistory = []) {
+    const { messageArrayCompressor } = require("../../helpers/chat");
+    const messageArray = this.constructPrompt(promptArgs);
+    return await messageArrayCompressor(this, messageArray, rawHistory);
+  }
+}
+
+module.exports = {
+  KoboldCPPLLM,
+};
diff --git a/server/utils/AiProviders/lmStudio/index.js b/server/utils/AiProviders/lmStudio/index.js
index 98cbbcaa5..48f689fbc 100644
--- a/server/utils/AiProviders/lmStudio/index.js
+++ b/server/utils/AiProviders/lmStudio/index.js
@@ -1,4 +1,3 @@
-const { chatPrompt } = require("../../chats");
 const {
   handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@@ -49,7 +48,7 @@ class LMStudioLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   // Ensure the user set a value for the token limit
@@ -85,65 +84,6 @@ class LMStudioLLM {
     return { safe: true, reasons: [] };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!this.model)
-      throw new Error(
-        `LMStudio chat: ${this.model} is not valid or defined for chat completion!`
-      );
-
-    const textResponse = await this.lmstudio.chat.completions
-      .create({
-        model: this.model,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      })
-      .then((result) => {
-        if (!result.hasOwnProperty("choices"))
-          throw new Error("LMStudio chat: No results!");
-        if (result.choices.length === 0)
-          throw new Error("LMStudio chat: No results length!");
-        return result.choices[0].message.content;
-      })
-      .catch((error) => {
-        throw new Error(
-          `LMStudio::createChatCompletion failed with: ${error.message}`
-        );
-      });
-
-    return textResponse;
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!this.model)
-      throw new Error(
-        `LMStudio chat: ${this.model} is not valid or defined for chat completion!`
-      );
-
-    const streamRequest = await this.lmstudio.chat.completions.create({
-      model: this.model,
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-      n: 1,
-      stream: true,
-      messages: await this.compressMessages(
-        {
-          systemPrompt: chatPrompt(workspace),
-          userPrompt: prompt,
-          chatHistory,
-        },
-        rawHistory
-      ),
-    });
-    return streamRequest;
-  }
-
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     if (!this.model)
       throw new Error(
diff --git a/server/utils/AiProviders/localAi/index.js b/server/utils/AiProviders/localAi/index.js
index 4a8921af8..504775285 100644
--- a/server/utils/AiProviders/localAi/index.js
+++ b/server/utils/AiProviders/localAi/index.js
@@ -1,4 +1,3 @@
-const { chatPrompt } = require("../../chats");
 const {
   handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@@ -41,7 +40,7 @@ class LocalAiLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   // Ensure the user set a value for the token limit
@@ -75,65 +74,6 @@ class LocalAiLLM {
     return { safe: true, reasons: [] };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `LocalAI chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const textResponse = await this.openai.chat.completions
-      .create({
-        model: this.model,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      })
-      .then((result) => {
-        if (!result.hasOwnProperty("choices"))
-          throw new Error("LocalAI chat: No results!");
-        if (result.choices.length === 0)
-          throw new Error("LocalAI chat: No results length!");
-        return result.choices[0].message.content;
-      })
-      .catch((error) => {
-        throw new Error(
-          `LocalAI::createChatCompletion failed with: ${error.message}`
-        );
-      });
-
-    return textResponse;
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `LocalAI chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const streamRequest = await this.openai.chat.completions.create({
-      model: this.model,
-      stream: true,
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-      n: 1,
-      messages: await this.compressMessages(
-        {
-          systemPrompt: chatPrompt(workspace),
-          userPrompt: prompt,
-          chatHistory,
-        },
-        rawHistory
-      ),
-    });
-    return streamRequest;
-  }
-
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     if (!(await this.isValidChatCompletionModel(this.model)))
       throw new Error(
diff --git a/server/utils/AiProviders/mistral/index.js b/server/utils/AiProviders/mistral/index.js
index 7b60f3fed..8410d4cb6 100644
--- a/server/utils/AiProviders/mistral/index.js
+++ b/server/utils/AiProviders/mistral/index.js
@@ -1,4 +1,3 @@
-const { chatPrompt } = require("../../chats");
 const {
   handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@@ -42,7 +41,7 @@ class MistralLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   promptWindowLimit() {
@@ -70,64 +69,6 @@ class MistralLLM {
     return { safe: true, reasons: [] };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `Mistral chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const textResponse = await this.openai.chat.completions
-      .create({
-        model: this.model,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      })
-      .then((result) => {
-        if (!result.hasOwnProperty("choices"))
-          throw new Error("Mistral chat: No results!");
-        if (result.choices.length === 0)
-          throw new Error("Mistral chat: No results length!");
-        return result.choices[0].message.content;
-      })
-      .catch((error) => {
-        throw new Error(
-          `Mistral::createChatCompletion failed with: ${error.message}`
-        );
-      });
-
-    return textResponse;
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `Mistral chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const streamRequest = await this.openai.chat.completions.create({
-      model: this.model,
-      stream: true,
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-      messages: await this.compressMessages(
-        {
-          systemPrompt: chatPrompt(workspace),
-          userPrompt: prompt,
-          chatHistory,
-        },
-        rawHistory
-      ),
-    });
-
-    return streamRequest;
-  }
-
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     if (!(await this.isValidChatCompletionModel(this.model)))
       throw new Error(
diff --git a/server/utils/AiProviders/native/index.js b/server/utils/AiProviders/native/index.js
index 07d8918cf..e13b68a2f 100644
--- a/server/utils/AiProviders/native/index.js
+++ b/server/utils/AiProviders/native/index.js
@@ -1,7 +1,6 @@
 const fs = require("fs");
 const path = require("path");
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
-const { chatPrompt } = require("../../chats");
 const {
   writeResponseChunk,
   clientAbortedHandler,
@@ -94,7 +93,7 @@ class NativeLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   // Ensure the user set a value for the token limit
@@ -123,45 +122,6 @@ class NativeLLM {
     return { safe: true, reasons: [] };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    try {
-      const messages = await this.compressMessages(
-        {
-          systemPrompt: chatPrompt(workspace),
-          userPrompt: prompt,
-          chatHistory,
-        },
-        rawHistory
-      );
-
-      const model = await this.#llamaClient({
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-      });
-      const response = await model.call(messages);
-      return response.content;
-    } catch (error) {
-      throw new Error(
-        `NativeLLM::createChatCompletion failed with: ${error.message}`
-      );
-    }
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    const model = await this.#llamaClient({
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-    });
-    const messages = await this.compressMessages(
-      {
-        systemPrompt: chatPrompt(workspace),
-        userPrompt: prompt,
-        chatHistory,
-      },
-      rawHistory
-    );
-    const responseStream = await model.stream(messages);
-    return responseStream;
-  }
-
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     const model = await this.#llamaClient({ temperature });
     const response = await model.call(messages);
diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js
index a19315254..73269d6d2 100644
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@@ -1,4 +1,3 @@
-const { chatPrompt } = require("../../chats");
 const { StringOutputParser } = require("@langchain/core/output_parsers");
 const {
   writeResponseChunk,
@@ -74,7 +73,7 @@ class OllamaAILLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   // Ensure the user set a value for the token limit
@@ -108,53 +107,6 @@ class OllamaAILLM {
     return { safe: true, reasons: [] };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    const messages = await this.compressMessages(
-      {
-        systemPrompt: chatPrompt(workspace),
-        userPrompt: prompt,
-        chatHistory,
-      },
-      rawHistory
-    );
-
-    const model = this.#ollamaClient({
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-    });
-    const textResponse = await model
-      .pipe(new StringOutputParser())
-      .invoke(this.#convertToLangchainPrototypes(messages))
-      .catch((e) => {
-        throw new Error(
-          `Ollama::getChatCompletion failed to communicate with Ollama. ${e.message}`
-        );
-      });
-
-    if (!textResponse || !textResponse.length)
-      throw new Error(`Ollama::sendChat text response was empty.`);
-
-    return textResponse;
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    const messages = await this.compressMessages(
-      {
-        systemPrompt: chatPrompt(workspace),
-        userPrompt: prompt,
-        chatHistory,
-      },
-      rawHistory
-    );
-
-    const model = this.#ollamaClient({
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-    });
-    const stream = await model
-      .pipe(new StringOutputParser())
-      .stream(this.#convertToLangchainPrototypes(messages));
-    return stream;
-  }
-
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     const model = this.#ollamaClient({ temperature });
     const textResponse = await model
diff --git a/server/utils/AiProviders/openAi/index.js b/server/utils/AiProviders/openAi/index.js
index d69ec11ee..3a4d997ce 100644
--- a/server/utils/AiProviders/openAi/index.js
+++ b/server/utils/AiProviders/openAi/index.js
@@ -1,5 +1,4 @@
 const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
-const { chatPrompt } = require("../../chats");
 const {
   handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@@ -41,7 +40,7 @@ class OpenAiLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   promptWindowLimit() {
@@ -122,65 +121,6 @@ class OpenAiLLM {
     return { safe: false, reasons };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `OpenAI chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const textResponse = await this.openai.chat.completions
-      .create({
-        model: this.model,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      })
-      .then((result) => {
-        if (!result.hasOwnProperty("choices"))
-          throw new Error("OpenAI chat: No results!");
-        if (result.choices.length === 0)
-          throw new Error("OpenAI chat: No results length!");
-        return result.choices[0].message.content;
-      })
-      .catch((error) => {
-        throw new Error(
-          `OpenAI::createChatCompletion failed with: ${error.message}`
-        );
-      });
-
-    return textResponse;
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `OpenAI chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const streamRequest = await this.openai.chat.completions({
-      model: this.model,
-      stream: true,
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-      n: 1,
-      messages: await this.compressMessages(
-        {
-          systemPrompt: chatPrompt(workspace),
-          userPrompt: prompt,
-          chatHistory,
-        },
-        rawHistory
-      ),
-    });
-    return streamRequest;
-  }
-
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     if (!(await this.isValidChatCompletionModel(this.model)))
       throw new Error(
diff --git a/server/utils/AiProviders/openRouter/index.js b/server/utils/AiProviders/openRouter/index.js
index 8fb078fbc..a83010835 100644
--- a/server/utils/AiProviders/openRouter/index.js
+++ b/server/utils/AiProviders/openRouter/index.js
@@ -1,10 +1,8 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
-const { chatPrompt } = require("../../chats");
 const { v4: uuidv4 } = require("uuid");
 const {
   writeResponseChunk,
   clientAbortedHandler,
-  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
 const fs = require("fs");
 const path = require("path");
@@ -99,7 +97,7 @@ class OpenRouterLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   promptWindowLimit() {
@@ -131,65 +129,6 @@ class OpenRouterLLM {
     return { safe: true, reasons: [] };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `OpenRouter chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const textResponse = await this.openai.chat.completions
-      .create({
-        model: this.model,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      })
-      .then((result) => {
-        if (!result.hasOwnProperty("choices"))
-          throw new Error("OpenRouter chat: No results!");
-        if (result.choices.length === 0)
-          throw new Error("OpenRouter chat: No results length!");
-        return result.choices[0].message.content;
-      })
-      .catch((error) => {
-        throw new Error(
-          `OpenRouter::createChatCompletion failed with: ${error.message}`
-        );
-      });
-
-    return textResponse;
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `OpenRouter chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const streamRequest = await this.openai.chat.completions.create({
-      model: this.model,
-      stream: true,
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-      n: 1,
-      messages: await this.compressMessages(
-        {
-          systemPrompt: chatPrompt(workspace),
-          userPrompt: prompt,
-          chatHistory,
-        },
-        rawHistory
-      ),
-    });
-    return streamRequest;
-  }
-
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     if (!(await this.isValidChatCompletionModel(this.model)))
       throw new Error(
@@ -304,143 +243,6 @@ class OpenRouterLLM {
     });
   }
 
-  // handleStream(response, stream, responseProps) {
-  //   const timeoutThresholdMs = 500;
-  //   const { uuid = uuidv4(), sources = [] } = responseProps;
-
-  //   return new Promise((resolve) => {
-  //     let fullText = "";
-  //     let chunk = "";
-  //     let lastChunkTime = null; // null when first token is still not received.
-
-  //     // Establish listener to early-abort a streaming response
-  //     // in case things go sideways or the user does not like the response.
-  //     // We preserve the generated text but continue as if chat was completed
-  //     // to preserve previously generated content.
-  //     const handleAbort = () => clientAbortedHandler(resolve, fullText);
-  //     response.on("close", handleAbort);
-
-  //     // NOTICE: Not all OpenRouter models will return a stop reason
-  //     // which keeps the connection open and so the model never finalizes the stream
-  //     // like the traditional OpenAI response schema does. So in the case the response stream
-  //     // never reaches a formal close state we maintain an interval timer that if we go >=timeoutThresholdMs with
-  //     // no new chunks then we kill the stream and assume it to be complete. OpenRouter is quite fast
-  //     // so this threshold should permit most responses, but we can adjust `timeoutThresholdMs` if
-  //     // we find it is too aggressive.
-  //     const timeoutCheck = setInterval(() => {
-  //       if (lastChunkTime === null) return;
-
-  //       const now = Number(new Date());
-  //       const diffMs = now - lastChunkTime;
-  //       if (diffMs >= timeoutThresholdMs) {
-  //         console.log(
-  //           `OpenRouter stream did not self-close and has been stale for >${timeoutThresholdMs}ms. Closing response stream.`
-  //         );
-  //         writeResponseChunk(response, {
-  //           uuid,
-  //           sources,
-  //           type: "textResponseChunk",
-  //           textResponse: "",
-  //           close: true,
-  //           error: false,
-  //         });
-  //         clearInterval(timeoutCheck);
-  //         response.removeListener("close", handleAbort);
-  //         resolve(fullText);
-  //       }
-  //     }, 500);
-
-  //     stream.data.on("data", (data) => {
-  //       const lines = data
-  //         ?.toString()
-  //         ?.split("\n")
-  //         .filter((line) => line.trim() !== "");
-
-  //       for (const line of lines) {
-  //         let validJSON = false;
-  //         const message = chunk + line.replace(/^data: /, "");
-
-  //         // JSON chunk is incomplete and has not ended yet
-  //         // so we need to stitch it together. You would think JSON
-  //         // chunks would only come complete - but they don't!
-  //         try {
-  //           JSON.parse(message);
-  //           validJSON = true;
-  //         } catch { }
-
-  //         if (!validJSON) {
-  //           // It can be possible that the chunk decoding is running away
-  //           // and the message chunk fails to append due to string length.
-  //           // In this case abort the chunk and reset so we can continue.
-  //           // ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
-  //           try {
-  //             chunk += message;
-  //           } catch (e) {
-  //             console.error(`Chunk appending error`, e);
-  //             chunk = "";
-  //           }
-  //           continue;
-  //         } else {
-  //           chunk = "";
-  //         }
-
-  //         if (message == "[DONE]") {
-  //           lastChunkTime = Number(new Date());
-  //           writeResponseChunk(response, {
-  //             uuid,
-  //             sources,
-  //             type: "textResponseChunk",
-  //             textResponse: "",
-  //             close: true,
-  //             error: false,
-  //           });
-  //           clearInterval(timeoutCheck);
-  //           response.removeListener("close", handleAbort);
-  //           resolve(fullText);
-  //         } else {
-  //           let finishReason = null;
-  //           let token = "";
-  //           try {
-  //             const json = JSON.parse(message);
-  //             token = json?.choices?.[0]?.delta?.content;
-  //             finishReason = json?.choices?.[0]?.finish_reason || null;
-  //           } catch {
-  //             continue;
-  //           }
-
-  //           if (token) {
-  //             fullText += token;
-  //             lastChunkTime = Number(new Date());
-  //             writeResponseChunk(response, {
-  //               uuid,
-  //               sources: [],
-  //               type: "textResponseChunk",
-  //               textResponse: token,
-  //               close: false,
-  //               error: false,
-  //             });
-  //           }
-
-  //           if (finishReason !== null) {
-  //             lastChunkTime = Number(new Date());
-  //             writeResponseChunk(response, {
-  //               uuid,
-  //               sources,
-  //               type: "textResponseChunk",
-  //               textResponse: "",
-  //               close: true,
-  //               error: false,
-  //             });
-  //             clearInterval(timeoutCheck);
-  //             response.removeListener("close", handleAbort);
-  //             resolve(fullText);
-  //           }
-  //         }
-  //       }
-  //     });
-  //   });
-  // }
-
   // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
   async embedTextInput(textInput) {
     return await this.embedder.embedTextInput(textInput);
diff --git a/server/utils/AiProviders/perplexity/index.js b/server/utils/AiProviders/perplexity/index.js
index 71b74e9e3..a17ec43f5 100644
--- a/server/utils/AiProviders/perplexity/index.js
+++ b/server/utils/AiProviders/perplexity/index.js
@@ -1,5 +1,4 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
-const { chatPrompt } = require("../../chats");
 const {
   handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@@ -50,7 +49,7 @@ class PerplexityLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   promptWindowLimit() {
@@ -81,65 +80,6 @@ class PerplexityLLM {
     return { safe: true, reasons: [] };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `Perplexity chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const textResponse = await this.openai.chat.completions
-      .create({
-        model: this.model,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      })
-      .then((result) => {
-        if (!result.hasOwnProperty("choices"))
-          throw new Error("Perplexity chat: No results!");
-        if (result.choices.length === 0)
-          throw new Error("Perplexity chat: No results length!");
-        return result.choices[0].message.content;
-      })
-      .catch((error) => {
-        throw new Error(
-          `Perplexity::createChatCompletion failed with: ${error.message}`
-        );
-      });
-
-    return textResponse;
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `Perplexity chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const streamRequest = await this.openai.chat.completions.create({
-      model: this.model,
-      stream: true,
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-      n: 1,
-      messages: await this.compressMessages(
-        {
-          systemPrompt: chatPrompt(workspace),
-          userPrompt: prompt,
-          chatHistory,
-        },
-        rawHistory
-      ),
-    });
-    return streamRequest;
-  }
-
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     if (!(await this.isValidChatCompletionModel(this.model)))
       throw new Error(
diff --git a/server/utils/AiProviders/togetherAi/index.js b/server/utils/AiProviders/togetherAi/index.js
index 6d91e9a98..577a4b742 100644
--- a/server/utils/AiProviders/togetherAi/index.js
+++ b/server/utils/AiProviders/togetherAi/index.js
@@ -1,4 +1,3 @@
-const { chatPrompt } = require("../../chats");
 const {
   handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
@@ -49,7 +48,7 @@ class TogetherAiLLM {
   }
 
   streamingEnabled() {
-    return "streamChat" in this && "streamGetChatCompletion" in this;
+    return "streamGetChatCompletion" in this;
   }
 
   // Ensure the user set a value for the token limit
@@ -82,65 +81,6 @@ class TogetherAiLLM {
     return { safe: true, reasons: [] };
   }
 
-  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `Together AI chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const textResponse = await this.openai.chat.completions
-      .create({
-        model: this.model,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      })
-      .then((result) => {
-        if (!result.hasOwnProperty("choices"))
-          throw new Error("Together AI chat: No results!");
-        if (result.choices.length === 0)
-          throw new Error("Together AI chat: No results length!");
-        return result.choices[0].message.content;
-      })
-      .catch((error) => {
-        throw new Error(
-          `TogetherAI::createChatCompletion failed with: ${error.message}`
-        );
-      });
-
-    return textResponse;
-  }
-
-  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    if (!(await this.isValidChatCompletionModel(this.model)))
-      throw new Error(
-        `TogetherAI chat: ${this.model} is not valid for chat completion!`
-      );
-
-    const streamRequest = await this.openai.chat.completions.create({
-      model: this.model,
-      stream: true,
-      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-      n: 1,
-      messages: await this.compressMessages(
-        {
-          systemPrompt: chatPrompt(workspace),
-          userPrompt: prompt,
-          chatHistory,
-        },
-        rawHistory
-      ),
-    });
-    return streamRequest;
-  }
-
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     if (!(await this.isValidChatCompletionModel(this.model)))
       throw new Error(
diff --git a/server/utils/EmbeddingEngines/cohere/index.js b/server/utils/EmbeddingEngines/cohere/index.js
new file mode 100644
index 000000000..0dfb61d0d
--- /dev/null
+++ b/server/utils/EmbeddingEngines/cohere/index.js
@@ -0,0 +1,86 @@
+const { toChunks } = require("../../helpers");
+
+class CohereEmbedder {
+  constructor() {
+    if (!process.env.COHERE_API_KEY)
+      throw new Error("No Cohere API key was set.");
+
+    const { CohereClient } = require("cohere-ai");
+    const cohere = new CohereClient({
+      token: process.env.COHERE_API_KEY,
+    });
+
+    this.cohere = cohere;
+    this.model = process.env.EMBEDDING_MODEL_PREF || "embed-english-v3.0";
+    this.inputType = "search_document";
+
+    // Limit of how many strings we can process in a single pass to stay with resource or network limits
+    this.maxConcurrentChunks = 96; // Cohere's limit per request is 96
+    this.embeddingMaxChunkLength = 1945; // https://docs.cohere.com/docs/embed-2 - assume a token is roughly 4 letters with some padding
+  }
+
+  async embedTextInput(textInput) {
+    this.inputType = "search_query";
+    const result = await this.embedChunks([textInput]);
+    return result?.[0] || [];
+  }
+
+  async embedChunks(textChunks = []) {
+    const embeddingRequests = [];
+    this.inputType = "search_document";
+
+    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
+      embeddingRequests.push(
+        new Promise((resolve) => {
+          this.cohere
+            .embed({
+              texts: chunk,
+              model: this.model,
+              inputType: this.inputType,
+            })
+            .then((res) => {
+              resolve({ data: res.embeddings, error: null });
+            })
+            .catch((e) => {
+              e.type =
+                e?.response?.data?.error?.code ||
+                e?.response?.status ||
+                "failed_to_embed";
+              e.message = e?.response?.data?.error?.message || e.message;
+              resolve({ data: [], error: e });
+            });
+        })
+      );
+    }
+
+    const { data = [], error = null } = await Promise.all(
+      embeddingRequests
+    ).then((results) => {
+      const errors = results
+        .filter((res) => !!res.error)
+        .map((res) => res.error)
+        .flat();
+
+      if (errors.length > 0) {
+        let uniqueErrors = new Set();
+        errors.map((error) =>
+          uniqueErrors.add(`[${error.type}]: ${error.message}`)
+        );
+        return { data: [], error: Array.from(uniqueErrors).join(", ") };
+      }
+
+      return {
+        data: results.map((res) => res?.data || []).flat(),
+        error: null,
+      };
+    });
+
+    if (!!error) throw new Error(`Cohere Failed to embed: ${error}`);
+
+    return data.length > 0 ? data : null;
+  }
+}
+
+module.exports = {
+  CohereEmbedder,
+};
diff --git a/server/utils/EmbeddingEngines/native/index.js b/server/utils/EmbeddingEngines/native/index.js
index ae73c4896..5494c8869 100644
--- a/server/utils/EmbeddingEngines/native/index.js
+++ b/server/utils/EmbeddingEngines/native/index.js
@@ -107,14 +107,21 @@ class NativeEmbedder {
       );
 
     let fetchResponse = await this.#fetchWithHost();
-    if (fetchResponse.pipeline !== null) return fetchResponse.pipeline;
+    if (fetchResponse.pipeline !== null) {
+      this.modelDownloaded = true;
+      return fetchResponse.pipeline;
+    }
 
     this.log(
       `Failed to download model from primary URL. Using fallback ${fetchResponse.retry}`
     );
     if (!!fetchResponse.retry)
       fetchResponse = await this.#fetchWithHost(fetchResponse.retry);
-    if (fetchResponse.pipeline !== null) return fetchResponse.pipeline;
+    if (fetchResponse.pipeline !== null) {
+      this.modelDownloaded = true;
+      return fetchResponse.pipeline;
+    }
+
     throw fetchResponse.error;
   }
 
diff --git a/server/utils/chats/commands/reset.js b/server/utils/chats/commands/reset.js
index a23eef7aa..f2bd4562c 100644
--- a/server/utils/chats/commands/reset.js
+++ b/server/utils/chats/commands/reset.js
@@ -23,6 +23,7 @@ async function resetMemory(
     sources: [],
     close: true,
     error: false,
+    action: "reset_chat",
   };
 }
 
diff --git a/server/utils/chats/embed.js b/server/utils/chats/embed.js
index 533ea0c34..98b096fb1 100644
--- a/server/utils/chats/embed.js
+++ b/server/utils/chats/embed.js
@@ -131,7 +131,11 @@ async function streamChatWithForEmbed(
 
   // If in query mode and no sources are found, do not
   // let the LLM try to hallucinate a response or use general knowledge
-  if (chatMode === "query" && sources.length === 0) {
+  if (
+    chatMode === "query" &&
+    sources.length === 0 &&
+    pinnedDocIdentifiers.length === 0
+  ) {
     writeResponseChunk(response, {
       id: uuid,
       type: "textResponse",
diff --git a/server/utils/chats/index.js b/server/utils/chats/index.js
index 38ce6c9bd..76f98e0df 100644
--- a/server/utils/chats/index.js
+++ b/server/utils/chats/index.js
@@ -140,9 +140,13 @@ async function chatWithWorkspace(
   contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
   sources = [...sources, ...vectorSearchResults.sources];
 
-  // If in query mode and no sources are found, do not
+  // If in query mode and no sources are found from the vector search and no pinned documents, do not
   // let the LLM try to hallucinate a response or use general knowledge and exit early
-  if (chatMode === "query" && sources.length === 0) {
+  if (
+    chatMode === "query" &&
+    vectorSearchResults.sources.length === 0 &&
+    pinnedDocIdentifiers.length === 0
+  ) {
     return {
       id: uuid,
       type: "textResponse",
diff --git a/server/utils/chats/stream.js b/server/utils/chats/stream.js
index 57f326664..ba4dea163 100644
--- a/server/utils/chats/stream.js
+++ b/server/utils/chats/stream.js
@@ -160,9 +160,13 @@ async function streamChatWithWorkspace(
   contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
   sources = [...sources, ...vectorSearchResults.sources];
 
-  // If in query mode and no sources are found, do not
+  // If in query mode and no sources are found from the vector search and no pinned documents, do not
   // let the LLM try to hallucinate a response or use general knowledge and exit early
-  if (chatMode === "query" && sources.length === 0) {
+  if (
+    chatMode === "query" &&
+    sources.length === 0 &&
+    pinnedDocIdentifiers.length === 0
+  ) {
     writeResponseChunk(response, {
       id: uuid,
       type: "textResponse",
diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js
index 1bb54170a..ce690ae47 100644
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@@ -14,6 +14,7 @@ const SUPPORT_CUSTOM_MODELS = [
   "perplexity",
   "openrouter",
   "lmstudio",
+  "koboldcpp",
 ];
 
 async function getCustomModels(provider = "", apiKey = null, basePath = null) {
@@ -39,6 +40,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
       return await getOpenRouterModels();
     case "lmstudio":
       return await getLMStudioModels(basePath);
+    case "koboldcpp":
+      return await getKoboldCPPModels(basePath);
     default:
       return { models: [], error: "Invalid provider for custom models" };
   }
@@ -171,6 +174,28 @@ async function getLMStudioModels(basePath = null) {
   }
 }
 
+async function getKoboldCPPModels(basePath = null) {
+  try {
+    const { OpenAI: OpenAIApi } = require("openai");
+    const openai = new OpenAIApi({
+      baseURL: basePath || process.env.LMSTUDIO_BASE_PATH,
+      apiKey: null,
+    });
+    const models = await openai.models
+      .list()
+      .then((results) => results.data)
+      .catch((e) => {
+        console.error(`KoboldCPP:listModels`, e.message);
+        return [];
+      });
+
+    return { models, error: null };
+  } catch (e) {
+    console.error(`KoboldCPP:getKoboldCPPModels`, e.message);
+    return { models: [], error: "Could not fetch KoboldCPP Models" };
+  }
+}
+
 async function ollamaAIModels(basePath = null) {
   let url;
   try {
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index c8cdd870f..ba65e3dfb 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -77,6 +77,12 @@ function getLLMProvider({ provider = null, model = null } = {}) {
     case "groq":
       const { GroqLLM } = require("../AiProviders/groq");
       return new GroqLLM(embedder, model);
+    case "koboldcpp":
+      const { KoboldCPPLLM } = require("../AiProviders/koboldCPP");
+      return new KoboldCPPLLM(embedder, model);
+    case "cohere":
+      const { CohereLLM } = require("../AiProviders/cohere");
+      return new CohereLLM(embedder, model);
     case "generic-openai":
       const { GenericOpenAiLLM } = require("../AiProviders/genericOpenAi");
       return new GenericOpenAiLLM(embedder, model);
@@ -110,6 +116,9 @@ function getEmbeddingEngineSelection() {
     case "lmstudio":
       const { LMStudioEmbedder } = require("../EmbeddingEngines/lmstudio");
       return new LMStudioEmbedder();
+    case "cohere":
+      const { CohereEmbedder } = require("../EmbeddingEngines/cohere");
+      return new CohereEmbedder();
     default:
       return null;
   }
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index ed6e5c13c..4d5e4bdd1 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -132,6 +132,20 @@ const KEY_MAPPING = {
     checks: [nonZero],
   },
 
+  // KoboldCPP Settings
+  KoboldCPPBasePath: {
+    envKey: "KOBOLD_CPP_BASE_PATH",
+    checks: [isNotEmpty, isValidURL],
+  },
+  KoboldCPPModelPref: {
+    envKey: "KOBOLD_CPP_MODEL_PREF",
+    checks: [isNotEmpty],
+  },
+  KoboldCPPTokenLimit: {
+    envKey: "KOBOLD_CPP_MODEL_TOKEN_LIMIT",
+    checks: [nonZero],
+  },
+
   // Generic OpenAI InferenceSettings
   GenericOpenAiBasePath: {
     envKey: "GENERIC_OPEN_AI_BASE_PATH",
@@ -290,6 +304,16 @@ const KEY_MAPPING = {
     checks: [isNotEmpty],
   },
 
+  // Cohere Options
+  CohereApiKey: {
+    envKey: "COHERE_API_KEY",
+    checks: [isNotEmpty],
+  },
+  CohereModelPref: {
+    envKey: "COHERE_MODEL_PREF",
+    checks: [isNotEmpty],
+  },
+
   // Whisper (transcription) providers
   WhisperProvider: {
     envKey: "WHISPER_PROVIDER",
@@ -393,6 +417,8 @@ function supportedLLM(input = "") {
     "perplexity",
     "openrouter",
     "groq",
+    "koboldcpp",
+    "cohere",
     "generic-openai",
   ].includes(input);
   return validSelection ? null : `${input} is not a valid LLM provider.`;
@@ -434,6 +460,7 @@ function supportedEmbeddingModel(input = "") {
     "native",
     "ollama",
     "lmstudio",
+    "cohere",
   ];
   return supported.includes(input)
     ? null
diff --git a/server/yarn.lock b/server/yarn.lock
index 49c202af6..e7ee9051f 100644
--- a/server/yarn.lock
+++ b/server/yarn.lock
@@ -1817,6 +1817,17 @@ cmake-js@^7.2.1:
     which "^2.0.2"
     yargs "^17.7.2"
 
+cohere-ai@^7.9.5:
+  version "7.9.5"
+  resolved "https://registry.yarnpkg.com/cohere-ai/-/cohere-ai-7.9.5.tgz#05a592fe19decb8692d1b19d93ac835d7f816b8b"
+  integrity sha512-tr8LUR3Q46agFpfEwaYwzYO4qAuN0/R/8YroG4bc86LadOacBAabctZUq0zfCdLiL7gB4yWJs4QCzfpRH3rQuw==
+  dependencies:
+    form-data "4.0.0"
+    js-base64 "3.7.2"
+    node-fetch "2.7.0"
+    qs "6.11.2"
+    url-join "4.0.1"
+
 color-convert@^1.9.3:
   version "1.9.3"
   resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8"
@@ -2846,19 +2857,19 @@ form-data-encoder@1.7.2:
   resolved "https://registry.yarnpkg.com/form-data-encoder/-/form-data-encoder-1.7.2.tgz#1f1ae3dccf58ed4690b86d87e4f57c654fbab040"
   integrity sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==
 
-form-data@^3.0.0:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f"
-  integrity sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==
+form-data@4.0.0, form-data@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.0.tgz#93919daeaf361ee529584b9b31664dc12c9fa452"
+  integrity sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==
   dependencies:
     asynckit "^0.4.0"
     combined-stream "^1.0.8"
     mime-types "^2.1.12"
 
-form-data@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.0.tgz#93919daeaf361ee529584b9b31664dc12c9fa452"
-  integrity sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==
+form-data@^3.0.0:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f"
+  integrity sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==
   dependencies:
     asynckit "^0.4.0"
     combined-stream "^1.0.8"
@@ -3652,6 +3663,11 @@ joi@^17.11.0:
     "@sideway/formula" "^3.0.1"
     "@sideway/pinpoint" "^2.0.0"
 
+js-base64@3.7.2:
+  version "3.7.2"
+  resolved "https://registry.yarnpkg.com/js-base64/-/js-base64-3.7.2.tgz#816d11d81a8aff241603d19ce5761e13e41d7745"
+  integrity sha512-NnRs6dsyqUXejqk/yv2aiXlAvOs56sLkX6nUdeaNezI5LFFLlsZjOThmwnrcwh5ZZRwZlCMnVAY3CvhIhoVEKQ==
+
 js-tiktoken@^1.0.11, js-tiktoken@^1.0.7, js-tiktoken@^1.0.8:
   version "1.0.11"
   resolved "https://registry.yarnpkg.com/js-tiktoken/-/js-tiktoken-1.0.11.tgz#d7d707b849f703841112660d9d55169424a35344"
@@ -4324,7 +4340,7 @@ node-domexception@1.0.0:
   resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5"
   integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==
 
-node-fetch@^2.6.1, node-fetch@^2.6.12, node-fetch@^2.6.7, node-fetch@^2.6.9:
+node-fetch@2.7.0, node-fetch@^2.6.1, node-fetch@^2.6.12, node-fetch@^2.6.7, node-fetch@^2.6.9:
   version "2.7.0"
   resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.7.0.tgz#d0f0fa6e3e2dc1d27efcd8ad99d550bda94d187d"
   integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==
@@ -4947,6 +4963,13 @@ qs@6.11.0:
   dependencies:
     side-channel "^1.0.4"
 
+qs@6.11.2:
+  version "6.11.2"
+  resolved "https://registry.yarnpkg.com/qs/-/qs-6.11.2.tgz#64bea51f12c1f5da1bc01496f48ffcff7c69d7d9"
+  integrity sha512-tDNIz22aBzCDxLtVH++VnTfzxlfeK5CbqohpSqpJgj1Wg/cQbStNAz3NuqCs5vV+pjBsK4x4pN9HlVh7rcYRiA==
+  dependencies:
+    side-channel "^1.0.4"
+
 qs@^6.7.0:
   version "6.12.1"
   resolved "https://registry.yarnpkg.com/qs/-/qs-6.12.1.tgz#39422111ca7cbdb70425541cba20c7d7b216599a"
@@ -5862,7 +5885,7 @@ uri-js@^4.2.2, uri-js@^4.4.1:
   dependencies:
     punycode "^2.1.0"
 
-url-join@^4.0.1:
+url-join@4.0.1, url-join@^4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/url-join/-/url-join-4.0.1.tgz#b642e21a2646808ffa178c4c5fda39844e12cde7"
   integrity sha512-jk1+QP6ZJqyOiuEI9AEWQfju/nB2Pw466kbA0LEZljHwKeMgd9WrAEgEGxjPDD2+TNbbb37rTyhEfrCXfuKXnA==