Merge branch 'master' into vertex

2024-11-19 04:30:10 +01:00 · 2024-11-04 11:52:42 -08:00 · 2024-11-04 11:52:42 -08:00 · 452a47a636
commit 452a47a636
parent 6cb676b925 04e29203a5
33 changed files with 841 additions and 38 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -33,6 +33,7 @@
    "Mintplex",
    "mixtral",
    "moderations",
+    "novita",
    "numpages",
    "Ollama",
    "Oobabooga",
--- a/README.md
+++ b/README.md
@ -97,6 +97,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace
 - [Apipie](https://apipie.ai/)
 - [xAI](https://x.ai/)
 - [Google Vertex](https://cloud.google.com/vertex-ai)
+- [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)

 **Embedder models:**

--- a/docker/.env.example
+++ b/docker/.env.example
@ -90,6 +90,10 @@ GID='1000'
 # LITE_LLM_BASE_PATH='http://127.0.0.1:4000'
 # LITE_LLM_API_KEY='sk-123abc'

+# LLM_PROVIDER='novita'
+# NOVITA_LLM_API_KEY='your-novita-api-key-here' check on https://novita.ai/settings#key-management
+# NOVITA_LLM_MODEL_PREF='gryphe/mythomax-l2-13b'
+
 # LLM_PROVIDER='cohere'
 # COHERE_API_KEY=
 # COHERE_MODEL_PREF='command-r'
--- a/frontend/src/components/LLMSelection/NovitaLLMOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/NovitaLLMOptions/index.jsx
@ -0,0 +1,142 @@
+import System from "@/models/system";
+import { CaretDown, CaretUp } from "@phosphor-icons/react";
+import { useState, useEffect } from "react";
+
+export default function NovitaLLMOptions({ settings }) {
+  return (
+    <div className="flex flex-col gap-y-4 mt-1.5">
+      <div className="flex gap-[36px]">
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-3">
+            Novita API Key
+          </label>
+          <input
+            type="password"
+            name="NovitaLLMApiKey"
+            className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
+            placeholder="Novita API Key"
+            defaultValue={settings?.NovitaLLMApiKey ? "*".repeat(20) : ""}
+            required={true}
+            autoComplete="off"
+            spellCheck={false}
+          />
+        </div>
+        {!settings?.credentialsOnly && (
+          <NovitaModelSelection settings={settings} />
+        )}
+      </div>
+      <AdvancedControls settings={settings} />
+    </div>
+  );
+}
+
+function AdvancedControls({ settings }) {
+  const [showAdvancedControls, setShowAdvancedControls] = useState(false);
+
+  return (
+    <div className="flex flex-col gap-y-4">
+      <button
+        type="button"
+        onClick={() => setShowAdvancedControls(!showAdvancedControls)}
+        className="text-white hover:text-white/70 flex items-center text-sm"
+      >
+        {showAdvancedControls ? "Hide" : "Show"} advanced controls
+        {showAdvancedControls ? (
+          <CaretUp size={14} className="ml-1" />
+        ) : (
+          <CaretDown size={14} className="ml-1" />
+        )}
+      </button>
+      <div hidden={!showAdvancedControls}>
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-3">
+            Stream Timeout (ms)
+          </label>
+          <input
+            type="number"
+            name="NovitaLLMTimeout"
+            className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
+            placeholder="Timeout value between token responses to auto-timeout the stream"
+            defaultValue={settings?.NovitaLLMTimeout ?? 500}
+            autoComplete="off"
+            onScroll={(e) => e.target.blur()}
+            min={500}
+            step={1}
+          />
+        </div>
+      </div>
+    </div>
+  );
+}
+
+function NovitaModelSelection({ settings }) {
+  const [groupedModels, setGroupedModels] = useState({});
+  const [loading, setLoading] = useState(true);
+
+  useEffect(() => {
+    async function findCustomModels() {
+      setLoading(true);
+      const { models } = await System.customModels("novita");
+      if (models?.length > 0) {
+        const modelsByOrganization = models.reduce((acc, model) => {
+          acc[model.organization] = acc[model.organization] || [];
+          acc[model.organization].push(model);
+          return acc;
+        }, {});
+
+        setGroupedModels(modelsByOrganization);
+      }
+
+      setLoading(false);
+    }
+    findCustomModels();
+  }, []);
+
+  if (loading || Object.keys(groupedModels).length === 0) {
+    return (
+      <div className="flex flex-col w-60">
+        <label className="text-white text-sm font-semibold block mb-3">
+          Chat Model Selection
+        </label>
+        <select
+          name="NovitaLLMModelPref"
+          disabled={true}
+          className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+        >
+          <option disabled={true} selected={true}>
+            -- loading available models --
+          </option>
+        </select>
+      </div>
+    );
+  }
+
+  return (
+    <div className="flex flex-col w-60">
+      <label className="text-white text-sm font-semibold block mb-3">
+        Chat Model Selection
+      </label>
+      <select
+        name="NovitaLLMModelPref"
+        required={true}
+        className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+      >
+        {Object.keys(groupedModels)
+          .sort()
+          .map((organization) => (
+            <optgroup key={organization} label={organization}>
+              {groupedModels[organization].map((model) => (
+                <option
+                  key={model.id}
+                  value={model.id}
+                  selected={settings?.NovitaLLMModelPref === model.id}
+                >
+                  {model.name}
+                </option>
+              ))}
+            </optgroup>
+          ))}
+      </select>
+    </div>
+  );
+}
--- a/frontend/src/hooks/useGetProvidersModels.js
+++ b/frontend/src/hooks/useGetProvidersModels.js
@ -71,7 +71,13 @@ function groupModels(models) {
  }, {});
 }

-const groupedProviders = ["togetherai", "fireworksai", "openai", "openrouter"];
+const groupedProviders = [
+  "togetherai",
+  "fireworksai",
+  "openai",
+  "novita",
+  "openrouter",
+];
 export default function useGetProviderModels(provider = null) {
  const [defaultModels, setDefaultModels] = useState([]);
  const [customModels, setCustomModels] = useState([]);
--- a/frontend/src/media/llmprovider/novita.png
+++ b/frontend/src/media/llmprovider/novita.png
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@ -12,6 +12,7 @@ import AnthropicLogo from "@/media/llmprovider/anthropic.png";
 import GeminiLogo from "@/media/llmprovider/gemini.png";
 import VertexLogo from "@/media/llmprovider/vertex.png";
 import OllamaLogo from "@/media/llmprovider/ollama.png";
+import NovitaLogo from "@/media/llmprovider/novita.png";
 import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
 import TogetherAILogo from "@/media/llmprovider/togetherai.png";
@ -41,6 +42,7 @@ import NativeLLMOptions from "@/components/LLMSelection/NativeLLMOptions";
 import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions";
 import VertexLLMOptions from "@/components/LLMSelection/VertexLLMOptions";
 import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions";
+import NovitaLLMOptions from "@/components/LLMSelection/NovitaLLMOptions";
 import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions";
 import FireworksAiOptions from "@/components/LLMSelection/FireworksAiOptions";
 import MistralOptions from "@/components/LLMSelection/MistralOptions";
@ -123,6 +125,15 @@ export const AVAILABLE_LLM_PROVIDERS = [
    description: "Run LLMs locally on your own machine.",
    requiredConfig: ["OllamaLLMBasePath"],
  },
+  {
+    name: "Novita AI",
+    value: "novita",
+    logo: NovitaLogo,
+    options: (settings) => <NovitaLLMOptions settings={settings} />,
+    description:
+      "Reliable, Scalable, and Cost-Effective for LLMs from Novita AI",
+    requiredConfig: ["NovitaLLMApiKey"],
+  },
  {
    name: "LM Studio",
    value: "lmstudio",
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@ -16,6 +16,7 @@ import MistralLogo from "@/media/llmprovider/mistral.jpeg";
 import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import PerplexityLogo from "@/media/llmprovider/perplexity.png";
 import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
+import NovitaLogo from "@/media/llmprovider/novita.png";
 import GroqLogo from "@/media/llmprovider/groq.png";
 import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png";
 import TextGenWebUILogo from "@/media/llmprovider/text-generation-webui.png";
@ -158,6 +159,14 @@ export const LLM_SELECTION_PRIVACY = {
    ],
    logo: OpenRouterLogo,
  },
+  novita: {
+    name: "Novita AI",
+    description: [
+      "Your chats will not be used for training",
+      "Your prompts and document text used in response creation are visible to Novita AI",
+    ],
+    logo: NovitaLogo,
+  },
  groq: {
    name: "Groq",
    description: [
--- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
@ -23,6 +23,7 @@ import LiteLLMLogo from "@/media/llmprovider/litellm.png";
 import AWSBedrockLogo from "@/media/llmprovider/bedrock.png";
 import DeepSeekLogo from "@/media/llmprovider/deepseek.png";
 import APIPieLogo from "@/media/llmprovider/apipie.png";
+import NovitaLogo from "@/media/llmprovider/novita.png";
 import XAILogo from "@/media/llmprovider/xai.png";

 import CohereLogo from "@/media/llmprovider/cohere.png";
@ -50,6 +51,7 @@ import LiteLLMOptions from "@/components/LLMSelection/LiteLLMOptions";
 import AWSBedrockLLMOptions from "@/components/LLMSelection/AwsBedrockLLMOptions";
 import DeepSeekOptions from "@/components/LLMSelection/DeepSeekOptions";
 import ApiPieLLMOptions from "@/components/LLMSelection/ApiPieOptions";
+import NovitaLLMOptions from "@/components/LLMSelection/NovitaLLMOptions";
 import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions";

 import LLMItem from "@/components/LLMSelection/LLMItem";
@ -113,6 +115,14 @@ const LLMS = [
    options: (settings) => <OllamaLLMOptions settings={settings} />,
    description: "Run LLMs locally on your own machine.",
  },
+  {
+    name: "Novita AI",
+    value: "novita",
+    logo: NovitaLogo,
+    options: (settings) => <NovitaLLMOptions settings={settings} />,
+    description:
+      "Reliable, Scalable, and Cost-Effective for LLMs from Novita AI",
+  },
  {
    name: "LM Studio",
    value: "lmstudio",
--- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
+++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
@ -17,6 +17,7 @@ const ENABLED_PROVIDERS = [
  "koboldcpp",
  "togetherai",
  "openrouter",
+  "novita",
  "mistral",
  "perplexity",
  "textgenwebui",
@ -40,6 +41,7 @@ const WARN_PERFORMANCE = [
  "ollama",
  "localai",
  "openrouter",
+  "novita",
  "generic-openai",
  "textgenwebui",
 ];
--- a/locales/README.ja-JP.md
+++ b/locales/README.ja-JP.md
@ -85,6 +85,7 @@ AnythingLLMのいくつかのクールな機能
 - [Fireworks AI (チャットモデル)](https://fireworks.ai/)
 - [Perplexity (チャットモデル)](https://www.perplexity.ai/)
 - [OpenRouter (チャットモデル)](https://openrouter.ai/)
+- [Novita AI (チャットモデル)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
 - [Mistral](https://mistral.ai/)
 - [Groq](https://groq.com/)
 - [Cohere](https://cohere.com/)
--- a/locales/README.zh-CN.md
+++ b/locales/README.zh-CN.md
@ -81,6 +81,7 @@ AnythingLLM的一些酷炫特性
 - [Fireworks AI (聊天模型)](https://fireworks.ai/)
 - [Perplexity (聊天模型)](https://www.perplexity.ai/)
 - [OpenRouter (聊天模型)](https://openrouter.ai/)
+- [Novita AI (聊天模型)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
 - [Mistral](https://mistral.ai/)
 - [Groq](https://groq.com/)
 - [Cohere](https://cohere.com/)
--- a/server/.env.example
+++ b/server/.env.example
@ -91,6 +91,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
 # LITE_LLM_BASE_PATH='http://127.0.0.1:4000'
 # LITE_LLM_API_KEY='sk-123abc'

+# LLM_PROVIDER='novita'
+# NOVITA_LLM_API_KEY='your-novita-api-key-here' check on https://novita.ai/settings#key-management
+# NOVITA_LLM_MODEL_PREF='gryphe/mythomax-l2-13b'
+
 # LLM_PROVIDER='cohere'
 # COHERE_API_KEY=
 # COHERE_MODEL_PREF='command-r'
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -456,6 +456,11 @@ const SystemSettings = {
      OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,
      OllamaLLMPerformanceMode: process.env.OLLAMA_PERFORMANCE_MODE ?? "base",

+      // Novita LLM Keys
+      NovitaLLMApiKey: !!process.env.NOVITA_LLM_API_KEY,
+      NovitaLLMModelPref: process.env.NOVITA_LLM_MODEL_PREF,
+      NovitaLLMTimeout: process.env.NOVITA_LLM_TIMEOUT_MS,
+
      // TogetherAI Keys
      TogetherAiApiKey: !!process.env.TOGETHER_AI_API_KEY,
      TogetherAiModelPref: process.env.TOGETHER_AI_MODEL_PREF,
--- a/server/storage/models/.gitignore
+++ b/server/storage/models/.gitignore
@ -2,4 +2,5 @@ Xenova
 downloaded/*
 !downloaded/.placeholder
 openrouter
-apipie
+apipie
+novita
--- a/server/utils/AiProviders/novita/index.js
+++ b/server/utils/AiProviders/novita/index.js
@ -0,0 +1,376 @@
+const { NativeEmbedder } = require("../../EmbeddingEngines/native");
+const { v4: uuidv4 } = require("uuid");
+const {
+  writeResponseChunk,
+  clientAbortedHandler,
+} = require("../../helpers/chat/responses");
+const fs = require("fs");
+const path = require("path");
+const { safeJsonParse } = require("../../http");
+const cacheFolder = path.resolve(
+  process.env.STORAGE_DIR
+    ? path.resolve(process.env.STORAGE_DIR, "models", "novita")
+    : path.resolve(__dirname, `../../../storage/models/novita`)
+);
+
+class NovitaLLM {
+  constructor(embedder = null, modelPreference = null) {
+    if (!process.env.NOVITA_LLM_API_KEY)
+      throw new Error("No Novita API key was set.");
+
+    const { OpenAI: OpenAIApi } = require("openai");
+    this.basePath = "https://api.novita.ai/v3/openai";
+    this.openai = new OpenAIApi({
+      baseURL: this.basePath,
+      apiKey: process.env.NOVITA_LLM_API_KEY ?? null,
+      defaultHeaders: {
+        "HTTP-Referer": "https://anythingllm.com",
+        "X-Novita-Source": "anythingllm",
+      },
+    });
+    this.model =
+      modelPreference ||
+      process.env.NOVITA_LLM_MODEL_PREF ||
+      "gryphe/mythomax-l2-13b";
+    this.limits = {
+      history: this.promptWindowLimit() * 0.15,
+      system: this.promptWindowLimit() * 0.15,
+      user: this.promptWindowLimit() * 0.7,
+    };
+
+    this.embedder = embedder ?? new NativeEmbedder();
+    this.defaultTemp = 0.7;
+    this.timeout = this.#parseTimeout();
+
+    if (!fs.existsSync(cacheFolder))
+      fs.mkdirSync(cacheFolder, { recursive: true });
+    this.cacheModelPath = path.resolve(cacheFolder, "models.json");
+    this.cacheAtPath = path.resolve(cacheFolder, ".cached_at");
+
+    this.log(`Loaded with model: ${this.model}`);
+  }
+
+  log(text, ...args) {
+    console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
+  }
+
+  /**
+   * Novita has various models that never return `finish_reasons` and thus leave the stream open
+   * which causes issues in subsequent messages. This timeout value forces us to close the stream after
+   * x milliseconds. This is a configurable value via the NOVITA_LLM_TIMEOUT_MS value
+   * @returns {number} The timeout value in milliseconds (default: 500)
+   */
+  #parseTimeout() {
+    if (isNaN(Number(process.env.NOVITA_LLM_TIMEOUT_MS))) return 500;
+    const setValue = Number(process.env.NOVITA_LLM_TIMEOUT_MS);
+    if (setValue < 500) return 500;
+    return setValue;
+  }
+
+  // This checks if the .cached_at file has a timestamp that is more than 1Week (in millis)
+  // from the current date. If it is, then we will refetch the API so that all the models are up
+  // to date.
+  #cacheIsStale() {
+    const MAX_STALE = 6.048e8; // 1 Week in MS
+    if (!fs.existsSync(this.cacheAtPath)) return true;
+    const now = Number(new Date());
+    const timestampMs = Number(fs.readFileSync(this.cacheAtPath));
+    return now - timestampMs > MAX_STALE;
+  }
+
+  // The Novita model API has a lot of models, so we cache this locally in the directory
+  // as if the cache directory JSON file is stale or does not exist we will fetch from API and store it.
+  // This might slow down the first request, but we need the proper token context window
+  // for each model and this is a constructor property - so we can really only get it if this cache exists.
+  // We used to have this as a chore, but given there is an API to get the info - this makes little sense.
+  async #syncModels() {
+    if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale())
+      return false;
+
+    this.log("Model cache is not present or stale. Fetching from Novita API.");
+    await fetchNovitaModels();
+    return;
+  }
+
+  #appendContext(contextTexts = []) {
+    if (!contextTexts || !contextTexts.length) return "";
+    return (
+      "\nContext:\n" +
+      contextTexts
+        .map((text, i) => {
+          return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+        })
+        .join("")
+    );
+  }
+
+  models() {
+    if (!fs.existsSync(this.cacheModelPath)) return {};
+    return safeJsonParse(
+      fs.readFileSync(this.cacheModelPath, { encoding: "utf-8" }),
+      {}
+    );
+  }
+
+  streamingEnabled() {
+    return "streamGetChatCompletion" in this;
+  }
+
+  static promptWindowLimit(modelName) {
+    const cacheModelPath = path.resolve(cacheFolder, "models.json");
+    const availableModels = fs.existsSync(cacheModelPath)
+      ? safeJsonParse(
+          fs.readFileSync(cacheModelPath, { encoding: "utf-8" }),
+          {}
+        )
+      : {};
+    return availableModels[modelName]?.maxLength || 4096;
+  }
+
+  promptWindowLimit() {
+    const availableModels = this.models();
+    return availableModels[this.model]?.maxLength || 4096;
+  }
+
+  async isValidChatCompletionModel(model = "") {
+    await this.#syncModels();
+    const availableModels = this.models();
+    return availableModels.hasOwnProperty(model);
+  }
+
+  /**
+   * Generates appropriate content array for a message + attachments.
+   * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
+   * @returns {string|object[]}
+   */
+  #generateContent({ userPrompt, attachments = [] }) {
+    if (!attachments.length) {
+      return userPrompt;
+    }
+
+    const content = [{ type: "text", text: userPrompt }];
+    for (let attachment of attachments) {
+      content.push({
+        type: "image_url",
+        image_url: {
+          url: attachment.contentString,
+          detail: "auto",
+        },
+      });
+    }
+    return content.flat();
+  }
+
+  constructPrompt({
+    systemPrompt = "",
+    contextTexts = [],
+    chatHistory = [],
+    userPrompt = "",
+    attachments = [],
+  }) {
+    const prompt = {
+      role: "system",
+      content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
+    };
+    return [
+      prompt,
+      ...chatHistory,
+      {
+        role: "user",
+        content: this.#generateContent({ userPrompt, attachments }),
+      },
+    ];
+  }
+
+  async getChatCompletion(messages = null, { temperature = 0.7 }) {
+    if (!(await this.isValidChatCompletionModel(this.model)))
+      throw new Error(
+        `Novita chat: ${this.model} is not valid for chat completion!`
+      );
+
+    const result = await this.openai.chat.completions
+      .create({
+        model: this.model,
+        messages,
+        temperature,
+      })
+      .catch((e) => {
+        throw new Error(e.message);
+      });
+
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
+  }
+
+  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
+    if (!(await this.isValidChatCompletionModel(this.model)))
+      throw new Error(
+        `Novita chat: ${this.model} is not valid for chat completion!`
+      );
+
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
+    return streamRequest;
+  }
+
+  handleStream(response, stream, responseProps) {
+    const timeoutThresholdMs = this.timeout;
+    const { uuid = uuidv4(), sources = [] } = responseProps;
+
+    return new Promise(async (resolve) => {
+      let fullText = "";
+      let lastChunkTime = null; // null when first token is still not received.
+
+      // Establish listener to early-abort a streaming response
+      // in case things go sideways or the user does not like the response.
+      // We preserve the generated text but continue as if chat was completed
+      // to preserve previously generated content.
+      const handleAbort = () => clientAbortedHandler(resolve, fullText);
+      response.on("close", handleAbort);
+
+      // NOTICE: Not all Novita models will return a stop reason
+      // which keeps the connection open and so the model never finalizes the stream
+      // like the traditional OpenAI response schema does. So in the case the response stream
+      // never reaches a formal close state we maintain an interval timer that if we go >=timeoutThresholdMs with
+      // no new chunks then we kill the stream and assume it to be complete. Novita is quite fast
+      // so this threshold should permit most responses, but we can adjust `timeoutThresholdMs` if
+      // we find it is too aggressive.
+      const timeoutCheck = setInterval(() => {
+        if (lastChunkTime === null) return;
+
+        const now = Number(new Date());
+        const diffMs = now - lastChunkTime;
+        if (diffMs >= timeoutThresholdMs) {
+          this.log(
+            `Novita stream did not self-close and has been stale for >${timeoutThresholdMs}ms. Closing response stream.`
+          );
+          writeResponseChunk(response, {
+            uuid,
+            sources,
+            type: "textResponseChunk",
+            textResponse: "",
+            close: true,
+            error: false,
+          });
+          clearInterval(timeoutCheck);
+          response.removeListener("close", handleAbort);
+          resolve(fullText);
+        }
+      }, 500);
+
+      try {
+        for await (const chunk of stream) {
+          const message = chunk?.choices?.[0];
+          const token = message?.delta?.content;
+          lastChunkTime = Number(new Date());
+
+          if (token) {
+            fullText += token;
+            writeResponseChunk(response, {
+              uuid,
+              sources: [],
+              type: "textResponseChunk",
+              textResponse: token,
+              close: false,
+              error: false,
+            });
+          }
+
+          if (message.finish_reason !== null) {
+            writeResponseChunk(response, {
+              uuid,
+              sources,
+              type: "textResponseChunk",
+              textResponse: "",
+              close: true,
+              error: false,
+            });
+            response.removeListener("close", handleAbort);
+            resolve(fullText);
+          }
+        }
+      } catch (e) {
+        writeResponseChunk(response, {
+          uuid,
+          sources,
+          type: "abort",
+          textResponse: null,
+          close: true,
+          error: e.message,
+        });
+        response.removeListener("close", handleAbort);
+        resolve(fullText);
+      }
+    });
+  }
+
+  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
+  async embedTextInput(textInput) {
+    return await this.embedder.embedTextInput(textInput);
+  }
+  async embedChunks(textChunks = []) {
+    return await this.embedder.embedChunks(textChunks);
+  }
+
+  async compressMessages(promptArgs = {}, rawHistory = []) {
+    const { messageArrayCompressor } = require("../../helpers/chat");
+    const messageArray = this.constructPrompt(promptArgs);
+    return await messageArrayCompressor(this, messageArray, rawHistory);
+  }
+}
+
+async function fetchNovitaModels() {
+  return await fetch(`https://api.novita.ai/v3/openai/models`, {
+    method: "GET",
+    headers: {
+      "Content-Type": "application/json",
+    },
+  })
+    .then((res) => res.json())
+    .then(({ data = [] }) => {
+      const models = {};
+      data.forEach((model) => {
+        models[model.id] = {
+          id: model.id,
+          name: model.title,
+          organization:
+            model.id.split("/")[0].charAt(0).toUpperCase() +
+            model.id.split("/")[0].slice(1),
+          maxLength: model.context_size,
+        };
+      });
+
+      // Cache all response information
+      if (!fs.existsSync(cacheFolder))
+        fs.mkdirSync(cacheFolder, { recursive: true });
+      fs.writeFileSync(
+        path.resolve(cacheFolder, "models.json"),
+        JSON.stringify(models),
+        {
+          encoding: "utf-8",
+        }
+      );
+      fs.writeFileSync(
+        path.resolve(cacheFolder, ".cached_at"),
+        String(Number(new Date())),
+        {
+          encoding: "utf-8",
+        }
+      );
+      return models;
+    })
+    .catch((e) => {
+      console.error(e);
+      return {};
+    });
+}
+
+module.exports = {
+  NovitaLLM,
+  fetchNovitaModels,
+};
--- a/server/utils/TextSplitter/index.js
+++ b/server/utils/TextSplitter/index.js
@ -1,3 +1,18 @@
+/**
+ * @typedef {object} DocumentMetadata
+ * @property {string} id - eg; "123e4567-e89b-12d3-a456-426614174000"
+ * @property {string} url - eg; "file://example.com/index.html"
+ * @property {string} title - eg; "example.com/index.html"
+ * @property {string} docAuthor - eg; "no author found"
+ * @property {string} description - eg; "No description found."
+ * @property {string} docSource - eg; "URL link uploaded by the user."
+ * @property {string} chunkSource - eg; link://https://example.com
+ * @property {string} published - ISO 8601 date string
+ * @property {number} wordCount - Number of words in the document
+ * @property {string} pageContent - The raw text content of the document
+ * @property {number} token_count_estimate - Number of tokens in the document
+ */
+
 function isNullOrNaN(value) {
  if (value === null) return true;
  return isNaN(value);
@ -29,10 +44,12 @@ class TextSplitter {
    console.log(`\x1b[35m[TextSplitter]\x1b[0m ${text}`, ...args);
  }

-  // Does a quick check to determine the text chunk length limit.
-  // Embedder models have hard-set limits that cannot be exceeded, just like an LLM context
-  // so here we want to allow override of the default 1000, but up to the models maximum, which is
-  // sometimes user defined.
+  /**
+   *  Does a quick check to determine the text chunk length limit.
+   * Embedder models have hard-set limits that cannot be exceeded, just like an LLM context
+   * so here we want to allow override of the default 1000, but up to the models maximum, which is
+   * sometimes user defined.
+   */
  static determineMaxChunkSize(preferred = null, embedderLimit = 1000) {
    const prefValue = isNullOrNaN(preferred)
      ? Number(embedderLimit)
@ -45,6 +62,70 @@ class TextSplitter {
    return prefValue > limit ? limit : prefValue;
  }

+  /**
+   *  Creates a string of metadata to be prepended to each chunk.
+   * @param {DocumentMetadata} metadata - Metadata to be prepended to each chunk.
+   * @returns {{[key: ('title' | 'published' | 'source')]: string}} Object of metadata that will be prepended to each chunk.
+   */
+  static buildHeaderMeta(metadata = {}) {
+    if (!metadata || Object.keys(metadata).length === 0) return null;
+    const PLUCK_MAP = {
+      title: {
+        as: "sourceDocument",
+        pluck: (metadata) => {
+          return metadata?.title || null;
+        },
+      },
+      published: {
+        as: "published",
+        pluck: (metadata) => {
+          return metadata?.published || null;
+        },
+      },
+      chunkSource: {
+        as: "source",
+        pluck: (metadata) => {
+          const validPrefixes = ["link://", "youtube://"];
+          // If the chunkSource is a link or youtube link, we can add the URL
+          // as its source in the metadata so the LLM can use it for context.
+          // eg prompt: Where did you get this information? -> answer: "from https://example.com"
+          if (
+            !metadata?.chunkSource || // Exists
+            !metadata?.chunkSource.length || // Is not empty
+            typeof metadata.chunkSource !== "string" || // Is a string
+            !validPrefixes.some(
+              (prefix) => metadata.chunkSource.startsWith(prefix) // Has a valid prefix we respect
+            )
+          )
+            return null;
+
+          // We know a prefix is present, so we can split on it and return the rest.
+          // If nothing is found, return null and it will not be added to the metadata.
+          let source = null;
+          for (const prefix of validPrefixes) {
+            source = metadata.chunkSource.split(prefix)?.[1] || null;
+            if (source) break;
+          }
+
+          return source;
+        },
+      },
+    };
+
+    const pluckedData = {};
+    Object.entries(PLUCK_MAP).forEach(([key, value]) => {
+      if (!(key in metadata)) return; // Skip if the metadata key is not present.
+      const pluckedValue = value.pluck(metadata);
+      if (!pluckedValue) return; // Skip if the plucked value is null/empty.
+      pluckedData[value.as] = pluckedValue;
+    });
+
+    return pluckedData;
+  }
+
+  /**
+   *  Creates a string of metadata to be prepended to each chunk.
+   */
  stringifyHeader() {
    if (!this.config.chunkHeaderMeta) return null;
    let content = "";
--- a/server/utils/agents/aibitat/index.js
+++ b/server/utils/agents/aibitat/index.js
@ -791,6 +791,8 @@ ${this.getHistory({ to: route.to })
        return new Providers.ApiPieProvider({ model: config.model });
      case "xai":
        return new Providers.XAIProvider({ model: config.model });
+      case "novita":
+        return new Providers.NovitaProvider({ model: config.model });

      default:
        throw new Error(
--- a/server/utils/agents/aibitat/providers/ai-provider.js
+++ b/server/utils/agents/aibitat/providers/ai-provider.js
@ -206,6 +206,14 @@ class Provider {
          apiKey: process.env.LITE_LLM_API_KEY ?? null,
          ...config,
        });
+      case "novita":
+        return new ChatOpenAI({
+          configuration: {
+            baseURL: "https://api.novita.ai/v3/openai",
+          },
+          apiKey: process.env.NOVITA_LLM_API_KEY ?? null,
+          ...config,
+        });

      default:
        throw new Error(`Unsupported provider ${provider} for this task.`);
--- a/server/utils/agents/aibitat/providers/index.js
+++ b/server/utils/agents/aibitat/providers/index.js
@ -18,6 +18,7 @@ const DeepSeekProvider = require("./deepseek.js");
 const LiteLLMProvider = require("./litellm.js");
 const ApiPieProvider = require("./apipie.js");
 const XAIProvider = require("./xai.js");
+const NovitaProvider = require("./novita.js");

 module.exports = {
  OpenAIProvider,
@ -40,4 +41,5 @@ module.exports = {
  LiteLLMProvider,
  ApiPieProvider,
  XAIProvider,
+  NovitaProvider,
 };
--- a/server/utils/agents/aibitat/providers/novita.js
+++ b/server/utils/agents/aibitat/providers/novita.js
@ -0,0 +1,115 @@
+const OpenAI = require("openai");
+const Provider = require("./ai-provider.js");
+const InheritMultiple = require("./helpers/classes.js");
+const UnTooled = require("./helpers/untooled.js");
+
+/**
+ * The agent provider for the Novita AI provider.
+ */
+class NovitaProvider extends InheritMultiple([Provider, UnTooled]) {
+  model;
+
+  constructor(config = {}) {
+    const { model = "gryphe/mythomax-l2-13b" } = config;
+    super();
+    const client = new OpenAI({
+      baseURL: "https://api.novita.ai/v3/openai",
+      apiKey: process.env.NOVITA_LLM_API_KEY,
+      maxRetries: 3,
+      defaultHeaders: {
+        "HTTP-Referer": "https://anythingllm.com",
+        "X-Novita-Source": "anythingllm",
+      },
+    });
+
+    this._client = client;
+    this.model = model;
+    this.verbose = true;
+  }
+
+  get client() {
+    return this._client;
+  }
+
+  async #handleFunctionCallChat({ messages = [] }) {
+    return await this.client.chat.completions
+      .create({
+        model: this.model,
+        temperature: 0,
+        messages,
+      })
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
+          throw new Error("Novita chat: No results!");
+        if (result.choices.length === 0)
+          throw new Error("Novita chat: No results length!");
+        return result.choices[0].message.content;
+      })
+      .catch((_) => {
+        return null;
+      });
+  }
+
+  /**
+   * Create a completion based on the received messages.
+   *
+   * @param messages A list of messages to send to the API.
+   * @param functions
+   * @returns The completion.
+   */
+  async complete(messages, functions = null) {
+    let completion;
+    if (functions.length > 0) {
+      const { toolCall, text } = await this.functionCall(
+        messages,
+        functions,
+        this.#handleFunctionCallChat.bind(this)
+      );
+
+      if (toolCall !== null) {
+        this.providerLog(`Valid tool call found - running ${toolCall.name}.`);
+        this.deduplicator.trackRun(toolCall.name, toolCall.arguments);
+        return {
+          result: null,
+          functionCall: {
+            name: toolCall.name,
+            arguments: toolCall.arguments,
+          },
+          cost: 0,
+        };
+      }
+      completion = { content: text };
+    }
+
+    if (!completion?.content) {
+      this.providerLog("Will assume chat completion without tool call inputs.");
+      const response = await this.client.chat.completions.create({
+        model: this.model,
+        messages: this.cleanMsgs(messages),
+      });
+      completion = response.choices[0].message;
+    }
+
+    // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent
+    // from calling the exact same function over and over in a loop within a single chat exchange
+    // _but_ we should enable it to call previously used tools in a new chat interaction.
+    this.deduplicator.reset("runs");
+    return {
+      result: completion.content,
+      cost: 0,
+    };
+  }
+
+  /**
+   * Get the cost of the completion.
+   *
+   * @param _usage The completion to get the cost for.
+   * @returns The cost of the completion.
+   * Stubbed since Novita AI has no cost basis.
+   */
+  getCost() {
+    return 0;
+  }
+}
+
+module.exports = NovitaProvider;
--- a/server/utils/agents/index.js
+++ b/server/utils/agents/index.js
@ -173,6 +173,10 @@ class AgentHandler {
        if (!process.env.XAI_LLM_API_KEY)
          throw new Error("xAI API Key must be provided to use agents.");
        break;
+      case "novita":
+        if (!process.env.NOVITA_LLM_API_KEY)
+          throw new Error("Novita API Key must be provided to use agents.");
+        break;

      default:
        throw new Error(
@ -236,6 +240,8 @@ class AgentHandler {
        return process.env.APIPIE_LLM_MODEL_PREF ?? null;
      case "xai":
        return process.env.XAI_LLM_MODEL_PREF ?? "grok-beta";
+      case "novita":
+        return process.env.NOVITA_LLM_MODEL_PREF ?? "gryphe/mythomax-l2-13b";
      default:
        return null;
    }
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -4,6 +4,7 @@ const { perplexityModels } = require("../AiProviders/perplexity");
 const { togetherAiModels } = require("../AiProviders/togetherAi");
 const { fireworksAiModels } = require("../AiProviders/fireworksAi");
 const { ElevenLabsTTS } = require("../TextToSpeech/elevenLabs");
+const { fetchNovitaModels } = require("../AiProviders/novita");
 const SUPPORT_CUSTOM_MODELS = [
  "openai",
  "localai",
@ -21,6 +22,7 @@ const SUPPORT_CUSTOM_MODELS = [
  "groq",
  "deepseek",
  "apipie",
+  "novita",
  "xai",
 ];

@ -61,6 +63,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
      return await getDeepSeekModels(apiKey);
    case "apipie":
      return await getAPIPieModels(apiKey);
+    case "novita":
+      return await getNovitaModels();
    case "xai":
      return await getXAIModels(apiKey);
    default:
@ -362,6 +366,20 @@ async function getOpenRouterModels() {
  return { models, error: null };
 }

+async function getNovitaModels() {
+  const knownModels = await fetchNovitaModels();
+  if (!Object.keys(knownModels).length === 0)
+    return { models: [], error: null };
+  const models = Object.values(knownModels).map((model) => {
+    return {
+      id: model.id,
+      organization: model.organization,
+      name: model.name,
+    };
+  });
+  return { models, error: null };
+}
+
 async function getAPIPieModels(apiKey = null) {
  const knownModels = await fetchApiPieModels(apiKey);
  if (!Object.keys(knownModels).length === 0)
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@ -168,6 +168,9 @@ function getLLMProvider({ provider = null, model = null } = {}) {
    case "apipie":
      const { ApiPieLLM } = require("../AiProviders/apipie");
      return new ApiPieLLM(embedder, model);
+    case "novita":
+      const { NovitaLLM } = require("../AiProviders/novita");
+      return new NovitaLLM(embedder, model);
    case "xai":
      const { XAiLLM } = require("../AiProviders/xai");
      return new XAiLLM(embedder, model);
@ -303,6 +306,9 @@ function getLLMProviderClass({ provider = null } = {}) {
    case "apipie":
      const { ApiPieLLM } = require("../AiProviders/apipie");
      return ApiPieLLM;
+    case "novita":
+      const { NovitaLLM } = require("../AiProviders/novita");
+      return NovitaLLM;
    case "xai":
      const { XAiLLM } = require("../AiProviders/xai");
      return XAiLLM;
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -412,6 +412,20 @@ const KEY_MAPPING = {
    checks: [],
  },

+  // Novita Options
+  NovitaLLMApiKey: {
+    envKey: "NOVITA_LLM_API_KEY",
+    checks: [isNotEmpty],
+  },
+  NovitaLLMModelPref: {
+    envKey: "NOVITA_LLM_MODEL_PREF",
+    checks: [isNotEmpty],
+  },
+  NovitaLLMTimeout: {
+    envKey: "NOVITA_LLM_TIMEOUT_MS",
+    checks: [],
+  },
+
  // Groq Options
  GroqApiKey: {
    envKey: "GROQ_API_KEY",
@ -673,6 +687,7 @@ function supportedLLM(input = "") {
    "huggingface",
    "perplexity",
    "openrouter",
+    "novita",
    "groq",
    "koboldcpp",
    "textgenwebui",
--- a/server/utils/vectorDbProviders/astra/index.js
+++ b/server/utils/vectorDbProviders/astra/index.js
@ -160,10 +160,7 @@ const AstraDB = {
          { label: "text_splitter_chunk_overlap" },
          20
        ),
-        chunkHeaderMeta: {
-          sourceDocument: metadata?.title,
-          published: metadata?.published || "unknown",
-        },
+        chunkHeaderMeta: TextSplitter.buildHeaderMeta(metadata),
      });
      const textChunks = await textSplitter.splitText(pageContent);

--- a/server/utils/vectorDbProviders/chroma/index.js
+++ b/server/utils/vectorDbProviders/chroma/index.js
@ -251,10 +251,7 @@ const Chroma = {
          { label: "text_splitter_chunk_overlap" },
          20
        ),
-        chunkHeaderMeta: {
-          sourceDocument: metadata?.title,
-          published: metadata?.published || "unknown",
-        },
+        chunkHeaderMeta: TextSplitter.buildHeaderMeta(metadata),
      });
      const textChunks = await textSplitter.splitText(pageContent);

--- a/server/utils/vectorDbProviders/lance/index.js
+++ b/server/utils/vectorDbProviders/lance/index.js
@ -240,10 +240,7 @@ const LanceDb = {
          { label: "text_splitter_chunk_overlap" },
          20
        ),
-        chunkHeaderMeta: {
-          sourceDocument: metadata?.title,
-          published: metadata?.published || "unknown",
-        },
+        chunkHeaderMeta: TextSplitter.buildHeaderMeta(metadata),
      });
      const textChunks = await textSplitter.splitText(pageContent);

--- a/server/utils/vectorDbProviders/milvus/index.js
+++ b/server/utils/vectorDbProviders/milvus/index.js
@ -203,10 +203,7 @@ const Milvus = {
          { label: "text_splitter_chunk_overlap" },
          20
        ),
-        chunkHeaderMeta: {
-          sourceDocument: metadata?.title,
-          published: metadata?.published || "unknown",
-        },
+        chunkHeaderMeta: TextSplitter.buildHeaderMeta(metadata),
      });
      const textChunks = await textSplitter.splitText(pageContent);

--- a/server/utils/vectorDbProviders/pinecone/index.js
+++ b/server/utils/vectorDbProviders/pinecone/index.js
@ -146,10 +146,7 @@ const PineconeDB = {
          { label: "text_splitter_chunk_overlap" },
          20
        ),
-        chunkHeaderMeta: {
-          sourceDocument: metadata?.title,
-          published: metadata?.published || "unknown",
-        },
+        chunkHeaderMeta: TextSplitter.buildHeaderMeta(metadata),
      });
      const textChunks = await textSplitter.splitText(pageContent);

--- a/server/utils/vectorDbProviders/qdrant/index.js
+++ b/server/utils/vectorDbProviders/qdrant/index.js
@ -222,10 +222,7 @@ const QDrant = {
          { label: "text_splitter_chunk_overlap" },
          20
        ),
-        chunkHeaderMeta: {
-          sourceDocument: metadata?.title,
-          published: metadata?.published || "unknown",
-        },
+        chunkHeaderMeta: TextSplitter.buildHeaderMeta(metadata),
      });
      const textChunks = await textSplitter.splitText(pageContent);

--- a/server/utils/vectorDbProviders/weaviate/index.js
+++ b/server/utils/vectorDbProviders/weaviate/index.js
@ -262,10 +262,7 @@ const Weaviate = {
          { label: "text_splitter_chunk_overlap" },
          20
        ),
-        chunkHeaderMeta: {
-          sourceDocument: metadata?.title,
-          published: metadata?.published || "unknown",
-        },
+        chunkHeaderMeta: TextSplitter.buildHeaderMeta(metadata),
      });
      const textChunks = await textSplitter.splitText(pageContent);

--- a/server/utils/vectorDbProviders/zilliz/index.js
+++ b/server/utils/vectorDbProviders/zilliz/index.js
@ -196,10 +196,7 @@ const Zilliz = {
          { label: "text_splitter_chunk_overlap" },
          20
        ),
-        chunkHeaderMeta: {
-          sourceDocument: metadata?.title,
-          published: metadata?.published || "unknown",
-        },
+        chunkHeaderMeta: TextSplitter.buildHeaderMeta(metadata),
      });
      const textChunks = await textSplitter.splitText(pageContent);