add Together AI LLM support (#560)

* add Together AI LLM support * update readme to support together ai * Patch togetherAI implementation * add model sorting/option labels by organization for model selection * linting + add data handling for TogetherAI * change truthy statement patch validLLMSelection method --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
2024-07-02 15:20:37 +02:00 · 2024-01-10 12:35:30 -08:00 · 2024-01-10 12:35:30 -08:00 · 1d39b8a2ce
commit 1d39b8a2ce
parent 8cd3a92c66
18 changed files with 809 additions and 14 deletions
--- a/README.md
+++ b/README.md
@ -1,4 +1,5 @@
 <a name="readme-top"></a>
+
 <p align="center">
  <a href="https://useanything.com"><img src="https://github.com/Mintplex-Labs/anything-llm/blob/master/images/wordmark.png?raw=true" alt="AnythingLLM logo"></a>
 </p>
@ -38,13 +39,14 @@ A full-stack application that enables you to turn any document, resource, or pie

 </details>

-
 ### Product Overview
+
 AnythingLLM is a full-stack application where you can use commercial off-the-shelf LLMs or popular open source LLMs and vectorDB solutions to build a private ChatGPT with no compromises that you can run locally as well as host remotely and be able to chat intelligently with any documents you provide it.

 AnythingLLM divides your documents into objects called `workspaces`. A Workspace functions a lot like a thread, but with the addition of containerization of your documents. Workspaces can share documents, but they do not talk to each other so you can keep your context for each workspace clean.

 Some cool features of AnythingLLM
+
 - **Multi-user instance support and permissioning**
 - Multiple document type support (PDF, TXT, DOCX, etc)
 - Manage documents in your vector database from a simple UI
@ -57,7 +59,9 @@ Some cool features of AnythingLLM
 - Full Developer API for custom integrations!

 ### Supported LLMs, Embedders, and Vector Databases
+
 **Supported LLMs:**
+
 - [Any open-source llama.cpp compatible model](/server/storage/models/README.md#text-generation-llm-selection)
 - [OpenAI](https://openai.com)
 - [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
@ -66,8 +70,10 @@ Some cool features of AnythingLLM
 - [Ollama (chat models)](https://ollama.ai/)
 - [LM Studio (all models)](https://lmstudio.ai)
 - [LocalAi (all models)](https://localai.io/)
+- [Together AI (chat models)](https://www.together.ai/)

 **Supported Embedding models:**
+
 - [AnythingLLM Native Embedder](/server/storage/models/README.md) (default)
 - [OpenAI](https://openai.com)
 - [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
@ -75,42 +81,43 @@ Some cool features of AnythingLLM
 - [LocalAi (all)](https://localai.io/)

 **Supported Vector Databases:**
+
 - [LanceDB](https://github.com/lancedb/lancedb) (default)
 - [Pinecone](https://pinecone.io)
 - [Chroma](https://trychroma.com)
 - [Weaviate](https://weaviate.io)
 - [QDrant](https://qdrant.tech)

-
 ### Technical Overview
+
 This monorepo consists of three main sections:
+
 - `frontend`: A viteJS + React frontend that you can run to easily create and manage all your content the LLM can use.
 - `server`: A NodeJS express server to handle all the interactions and do all the vectorDB management and LLM interactions.
 - `docker`: Docker instructions and build process + information for building from source.
 - `collector`: NodeJS express server that process and parses documents from the UI.

 ## 🛳 Self Hosting
-Mintplex Labs & the community maintain a number of deployment methods, scripts, and templates that you can use to run AnythingLLM locally. Refer to the table below to read how to deploy on your preferred environment or to automatically deploy.
-| Docker                                 | AWS | GCP | Digital Ocean | Render.com |
-|----------------------------------------|----:|-----|---------------|------------|
-| [![Deploy on Docker][docker-btn]][docker-deploy] |  [![Deploy on AWS][aws-btn]][aws-deploy] | [![Deploy on GCP][gcp-btn]][gcp-deploy]  | [![Deploy on DigitalOcean][do-btn]][aws-deploy]  |  [![Deploy on Render.com][render-btn]][render-deploy] |

+Mintplex Labs & the community maintain a number of deployment methods, scripts, and templates that you can use to run AnythingLLM locally. Refer to the table below to read how to deploy on your preferred environment or to automatically deploy.
+| Docker | AWS | GCP | Digital Ocean | Render.com |
+|----------------------------------------|----:|-----|---------------|------------|
+| [![Deploy on Docker][docker-btn]][docker-deploy] | [![Deploy on AWS][aws-btn]][aws-deploy] | [![Deploy on GCP][gcp-btn]][gcp-deploy] | [![Deploy on DigitalOcean][do-btn]][aws-deploy] | [![Deploy on Render.com][render-btn]][render-deploy] |

 ## How to setup for development
+
 - `yarn setup` To fill in the required `.env` files you'll need in each of the application sections (from root of repo).
  - Go fill those out before proceeding. Ensure `server/.env.development` is filled or else things won't work right.
 - `yarn dev:server` To boot the server locally (from root of repo).
 - `yarn dev:frontend` To boot the frontend locally (from root of repo).
 - `yarn dev:collector` To then run the document collector (from root of repo).

-
-
-
 [Learn about documents](./server/storage/documents/DOCUMENTS.md)

 [Learn about vector caching](./server/storage/vector-cache/VECTOR_CACHE.md)

 ## Contributing
+
 - create issue
 - create PR with branch name format of `<issue number>-<short name>`
 - yee haw let's merge
@ -119,12 +126,15 @@ Mintplex Labs & the community maintain a number of deployment methods, scripts,
 <summary><kbd>Telemetry for AnythingLLM</kbd></summary>

 ## Telemetry
+
 AnythingLLM by Mintplex Labs Inc contains a telemetry feature that collects anonymous usage information.

 ### Why?
+
 We use this information to help us understand how AnythingLLM is used, to help us prioritize work on new features and bug fixes, and to help us improve AnythingLLM's performance and stability.

 ### Opting out
+
 Set `DISABLE_TELEMETRY` in your server or docker .env settings to "true" to opt out of telemetry.

 ```
@ -132,7 +142,9 @@ DISABLE_TELEMETRY="true"
 ```

 ### What do you explicitly track?
+
 We will only track usage details that help us make product and roadmap decisions, specifically:
+
 - Version of your installation
 - When a document is added or removed. No information _about_ the document. Just that the event occurred. This gives us an idea of use.
 - Type of vector database in use. Let's us know which vector database provider is the most used to prioritize changes when updates arrive for that provider.
@ -160,6 +172,7 @@ Copyright © 2023 [Mintplex Labs][profile-link]. <br />
 This project is [MIT](./LICENSE) licensed.

 <!-- LINK GROUP -->
+
 [back-to-top]: https://img.shields.io/badge/-BACK_TO_TOP-222628?style=flat-square
 [profile-link]: https://github.com/mintplex-labs
 [vector-admin]: https://github.com/mintplex-labs/vector-admin
--- a/docker/.env.example
+++ b/docker/.env.example
@ -40,6 +40,10 @@ GID='1000'
 # OLLAMA_MODEL_PREF='llama2'
 # OLLAMA_MODEL_TOKEN_LIMIT=4096

+# LLM_PROVIDER='togetherai'
+# TOGETHER_AI_API_KEY='my-together-ai-key'
+# TOGETHER_AI_MODEL_PREF='mistralai/Mixtral-8x7B-Instruct-v0.1'
+
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
--- a/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx
@ -0,0 +1,95 @@
+import System from "@/models/system";
+import { useState, useEffect } from "react";
+
+export default function TogetherAiOptions({ settings }) {
+  return (
+    <div className="flex gap-x-4">
+      <div className="flex flex-col w-60">
+        <label className="text-white text-sm font-semibold block mb-4">
+          Together AI API Key
+        </label>
+        <input
+          type="password"
+          name="TogetherAiApiKey"
+          className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
+          placeholder="Together AI API Key"
+          defaultValue={settings?.TogetherAiApiKey ? "*".repeat(20) : ""}
+          required={true}
+          autoComplete="off"
+          spellCheck={false}
+        />
+      </div>
+      <TogetherAiModelSelection settings={settings} />
+    </div>
+  );
+}
+function TogetherAiModelSelection({ settings }) {
+  const [groupedModels, setGroupedModels] = useState({});
+  const [loading, setLoading] = useState(true);
+
+  useEffect(() => {
+    async function findCustomModels() {
+      setLoading(true);
+      const { models } = await System.customModels("togetherai");
+
+      if (models?.length > 0) {
+        const modelsByOrganization = models.reduce((acc, model) => {
+          acc[model.organization] = acc[model.organization] || [];
+          acc[model.organization].push(model);
+          return acc;
+        }, {});
+
+        setGroupedModels(modelsByOrganization);
+      }
+
+      setLoading(false);
+    }
+    findCustomModels();
+  }, []);
+
+  if (loading || Object.keys(groupedModels).length === 0) {
+    return (
+      <div className="flex flex-col w-60">
+        <label className="text-white text-sm font-semibold block mb-4">
+          Chat Model Selection
+        </label>
+        <select
+          name="TogetherAiModelPref"
+          disabled={true}
+          className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+        >
+          <option disabled={true} selected={true}>
+            -- loading available models --
+          </option>
+        </select>
+      </div>
+    );
+  }
+
+  return (
+    <div className="flex flex-col w-60">
+      <label className="text-white text-sm font-semibold block mb-4">
+        Chat Model Selection
+      </label>
+      <select
+        name="TogetherAiModelPref"
+        required={true}
+        className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+      >
+        {Object.entries(groupedModels).map(([organization, models]) => (
+          <optgroup key={organization} label={organization}>
+            {models.map((model) => (
+              <option
+                key={model.id}
+                value={model.id}
+                selected={settings.TogetherAiModelPref === model.id}
+              >
+                {model.name}
+              </option>
+            ))}
+          </optgroup>
+        ))}
+      </select>
+    </div>
+  );
+}
--- a/frontend/src/media/llmprovider/togetherai.png
+++ b/frontend/src/media/llmprovider/togetherai.png
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@ -11,6 +11,7 @@ import GeminiLogo from "@/media/llmprovider/gemini.png";
 import OllamaLogo from "@/media/llmprovider/ollama.png";
 import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
+import TogetherAILogo from "@/media/llmprovider/togetherai.png";
 import PreLoader from "@/components/Preloader";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
 import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
@ -22,6 +23,7 @@ import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions";
 import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions";
 import LLMItem from "@/components/LLMSelection/LLMItem";
 import { MagnifyingGlass } from "@phosphor-icons/react";
+import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions";

 export default function GeneralLLMPreference() {
  const [saving, setSaving] = useState(false);
@ -127,6 +129,13 @@ export default function GeneralLLMPreference() {
      options: <LocalAiOptions settings={settings} />,
      description: "Run LLMs locally on your own machine.",
    },
+    {
+      name: "Together AI",
+      value: "togetherai",
+      logo: TogetherAILogo,
+      options: <TogetherAiOptions settings={settings} />,
+      description: "Run open source models from Together AI.",
+    },
    {
      name: "Native",
      value: "native",
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@ -6,6 +6,7 @@ import AzureOpenAiLogo from "@/media/llmprovider/azure.png";
 import AnthropicLogo from "@/media/llmprovider/anthropic.png";
 import GeminiLogo from "@/media/llmprovider/gemini.png";
 import OllamaLogo from "@/media/llmprovider/ollama.png";
+import TogetherAILogo from "@/media/llmprovider/togetherai.png";
 import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
 import ChromaLogo from "@/media/vectordbs/chroma.png";
@ -25,7 +26,7 @@ const LLM_SELECTION_PRIVACY = {
    name: "OpenAI",
    description: [
      "Your chats will not be used for training",
-      "Your prompts and document text used in responses are visible to OpenAI",
+      "Your prompts and document text used in response creation are visible to OpenAI",
    ],
    logo: OpenAiLogo,
  },
@ -41,7 +42,7 @@ const LLM_SELECTION_PRIVACY = {
    name: "Anthropic",
    description: [
      "Your chats will not be used for training",
-      "Your prompts and document text used in responses are visible to Anthropic",
+      "Your prompts and document text used in response creation are visible to Anthropic",
    ],
    logo: AnthropicLogo,
  },
@ -49,7 +50,7 @@ const LLM_SELECTION_PRIVACY = {
    name: "Google Gemini",
    description: [
      "Your chats are de-identified and used in training",
-      "Your prompts and document text are visible in responses to Google",
+      "Your prompts and document text used in response creation are visible to Google",
    ],
    logo: GeminiLogo,
  },
@ -81,6 +82,14 @@ const LLM_SELECTION_PRIVACY = {
    ],
    logo: AnythingLLMIcon,
  },
+  togetherai: {
+    name: "TogetherAI",
+    description: [
+      "Your chats will not be used for training",
+      "Your prompts and document text used in response creation are visible to TogetherAI",
+    ],
+    logo: TogetherAILogo,
+  },
 };

 const VECTOR_DB_PRIVACY = {
--- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
@ -7,6 +7,7 @@ import GeminiLogo from "@/media/llmprovider/gemini.png";
 import OllamaLogo from "@/media/llmprovider/ollama.png";
 import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
+import TogetherAILogo from "@/media/llmprovider/togetherai.png";
 import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
 import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
@ -21,6 +22,7 @@ import System from "@/models/system";
 import paths from "@/utils/paths";
 import showToast from "@/utils/toast";
 import { useNavigate } from "react-router-dom";
+import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions";

 const TITLE = "LLM Preference";
 const DESCRIPTION =
@ -100,6 +102,13 @@ export default function LLMPreference({
      options: <LocalAiOptions settings={settings} />,
      description: "Run LLMs locally on your own machine.",
    },
+    {
+      name: "Together AI",
+      value: "togetherai",
+      logo: TogetherAILogo,
+      options: <TogetherAiOptions settings={settings} />,
+      description: "Run open source models from Together AI.",
+    },
    {
      name: "Native",
      value: "native",
--- a/server/.env.example
+++ b/server/.env.example
@ -37,6 +37,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
 # OLLAMA_MODEL_PREF='llama2'
 # OLLAMA_MODEL_TOKEN_LIMIT=4096

+# LLM_PROVIDER='togetherai'
+# TOGETHER_AI_API_KEY='my-together-ai-key'
+# TOGETHER_AI_MODEL_PREF='mistralai/Mixtral-8x7B-Instruct-v0.1'
+
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -133,6 +133,18 @@ const SystemSettings = {
            OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF,
            OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT,

+            // For embedding credentials when ollama is selected.
+            OpenAiKey: !!process.env.OPEN_AI_KEY,
+            AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT,
+            AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY,
+            AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
+          }
+        : {}),
+      ...(llmProvider === "togetherai"
+        ? {
+            TogetherAiApiKey: !!process.env.TOGETHER_AI_API_KEY,
+            TogetherAiModelPref: process.env.TOGETHER_AI_MODEL_PREF,
+
            // For embedding credentials when ollama is selected.
            OpenAiKey: !!process.env.OPEN_AI_KEY,
            AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT,
@ -143,6 +155,12 @@ const SystemSettings = {
      ...(llmProvider === "native"
        ? {
            NativeLLMModelPref: process.env.NATIVE_LLM_MODEL_PREF,
+
+            // For embedding credentials when ollama is selected.
+            OpenAiKey: !!process.env.OPEN_AI_KEY,
+            AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT,
+            AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY,
+            AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
          }
        : {}),
    };
--- a/server/utils/AiProviders/togetherAi/index.js
+++ b/server/utils/AiProviders/togetherAi/index.js
@ -0,0 +1,198 @@
+const { chatPrompt } = require("../../chats");
+
+function togetherAiModels() {
+  const { MODELS } = require("./models.js");
+  return MODELS || {};
+}
+
+class TogetherAiLLM {
+  constructor(embedder = null) {
+    const { Configuration, OpenAIApi } = require("openai");
+    if (!process.env.TOGETHER_AI_API_KEY)
+      throw new Error("No TogetherAI API key was set.");
+
+    const config = new Configuration({
+      basePath: "https://api.together.xyz/v1",
+      apiKey: process.env.TOGETHER_AI_API_KEY,
+    });
+    this.openai = new OpenAIApi(config);
+    this.model = process.env.TOGETHER_AI_MODEL_PREF;
+    this.limits = {
+      history: this.promptWindowLimit() * 0.15,
+      system: this.promptWindowLimit() * 0.15,
+      user: this.promptWindowLimit() * 0.7,
+    };
+
+    if (!embedder)
+      throw new Error(
+        "INVALID TOGETHER AI SETUP. No embedding engine has been set. Go to instance settings and set up an embedding interface to use Together AI as your LLM."
+      );
+    this.embedder = embedder;
+  }
+
+  #appendContext(contextTexts = []) {
+    if (!contextTexts || !contextTexts.length) return "";
+    return (
+      "\nContext:\n" +
+      contextTexts
+        .map((text, i) => {
+          return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+        })
+        .join("")
+    );
+  }
+
+  allModelInformation() {
+    return togetherAiModels();
+  }
+
+  streamingEnabled() {
+    return "streamChat" in this && "streamGetChatCompletion" in this;
+  }
+
+  // Ensure the user set a value for the token limit
+  // and if undefined - assume 4096 window.
+  promptWindowLimit() {
+    const availableModels = this.allModelInformation();
+    return availableModels[this.model]?.maxLength || 4096;
+  }
+
+  async isValidChatCompletionModel(model = "") {
+    const availableModels = this.allModelInformation();
+    return availableModels.hasOwnProperty(model);
+  }
+
+  constructPrompt({
+    systemPrompt = "",
+    contextTexts = [],
+    chatHistory = [],
+    userPrompt = "",
+  }) {
+    const prompt = {
+      role: "system",
+      content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
+    };
+    return [prompt, ...chatHistory, { role: "user", content: userPrompt }];
+  }
+
+  async isSafe(_input = "") {
+    // Not implemented so must be stubbed
+    return { safe: true, reasons: [] };
+  }
+
+  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
+    if (!(await this.isValidChatCompletionModel(this.model)))
+      throw new Error(
+        `Together AI chat: ${this.model} is not valid for chat completion!`
+      );
+
+    const textResponse = await this.openai
+      .createChatCompletion({
+        model: this.model,
+        temperature: Number(workspace?.openAiTemp ?? 0.7),
+        n: 1,
+        messages: await this.compressMessages(
+          {
+            systemPrompt: chatPrompt(workspace),
+            userPrompt: prompt,
+            chatHistory,
+          },
+          rawHistory
+        ),
+      })
+      .then((json) => {
+        const res = json.data;
+        if (!res.hasOwnProperty("choices"))
+          throw new Error("Together AI chat: No results!");
+        if (res.choices.length === 0)
+          throw new Error("Together AI chat: No results length!");
+        return res.choices[0].message.content;
+      })
+      .catch((error) => {
+        throw new Error(
+          `TogetherAI::createChatCompletion failed with: ${error.message}`
+        );
+      });
+
+    return textResponse;
+  }
+
+  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
+    if (!(await this.isValidChatCompletionModel(this.model)))
+      throw new Error(
+        `TogetherAI chat: ${this.model} is not valid for chat completion!`
+      );
+
+    const streamRequest = await this.openai.createChatCompletion(
+      {
+        model: this.model,
+        stream: true,
+        temperature: Number(workspace?.openAiTemp ?? 0.7),
+        n: 1,
+        messages: await this.compressMessages(
+          {
+            systemPrompt: chatPrompt(workspace),
+            userPrompt: prompt,
+            chatHistory,
+          },
+          rawHistory
+        ),
+      },
+      { responseType: "stream" }
+    );
+    return { type: "togetherAiStream", stream: streamRequest };
+  }
+
+  async getChatCompletion(messages = null, { temperature = 0.7 }) {
+    if (!(await this.isValidChatCompletionModel(this.model)))
+      throw new Error(
+        `TogetherAI chat: ${this.model} is not valid for chat completion!`
+      );
+
+    const { data } = await this.openai.createChatCompletion({
+      model: this.model,
+      messages,
+      temperature,
+    });
+
+    if (!data.hasOwnProperty("choices")) return null;
+    return data.choices[0].message.content;
+  }
+
+  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
+    if (!(await this.isValidChatCompletionModel(this.model)))
+      throw new Error(
+        `TogetherAI chat: ${this.model} is not valid for chat completion!`
+      );
+
+    const streamRequest = await this.openai.createChatCompletion(
+      {
+        model: this.model,
+        stream: true,
+        messages,
+        temperature,
+      },
+      { responseType: "stream" }
+    );
+    return { type: "togetherAiStream", stream: streamRequest };
+  }
+
+  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
+  async embedTextInput(textInput) {
+    return await this.embedder.embedTextInput(textInput);
+  }
+  async embedChunks(textChunks = []) {
+    return await this.embedder.embedChunks(textChunks);
+  }
+
+  async compressMessages(promptArgs = {}, rawHistory = []) {
+    const { messageArrayCompressor } = require("../../helpers/chat");
+    const messageArray = this.constructPrompt(promptArgs);
+    return await messageArrayCompressor(this, messageArray, rawHistory);
+  }
+}
+
+module.exports = {
+  TogetherAiLLM,
+  togetherAiModels,
+};
--- a/server/utils/AiProviders/togetherAi/models.js
+++ b/server/utils/AiProviders/togetherAi/models.js
@ -0,0 +1,226 @@
+const MODELS = {
+  "togethercomputer/alpaca-7b": {
+    id: "togethercomputer/alpaca-7b",
+    organization: "Stanford",
+    name: "Alpaca (7B)",
+    maxLength: 2048,
+  },
+  "Austism/chronos-hermes-13b": {
+    id: "Austism/chronos-hermes-13b",
+    organization: "Austism",
+    name: "Chronos Hermes (13B)",
+    maxLength: 2048,
+  },
+  "togethercomputer/CodeLlama-13b-Instruct": {
+    id: "togethercomputer/CodeLlama-13b-Instruct",
+    organization: "Meta",
+    name: "Code Llama Instruct (13B)",
+    maxLength: 8192,
+  },
+  "togethercomputer/CodeLlama-34b-Instruct": {
+    id: "togethercomputer/CodeLlama-34b-Instruct",
+    organization: "Meta",
+    name: "Code Llama Instruct (34B)",
+    maxLength: 8192,
+  },
+  "togethercomputer/CodeLlama-7b-Instruct": {
+    id: "togethercomputer/CodeLlama-7b-Instruct",
+    organization: "Meta",
+    name: "Code Llama Instruct (7B)",
+    maxLength: 8192,
+  },
+  "DiscoResearch/DiscoLM-mixtral-8x7b-v2": {
+    id: "DiscoResearch/DiscoLM-mixtral-8x7b-v2",
+    organization: "DiscoResearch",
+    name: "DiscoLM Mixtral 8x7b",
+    maxLength: 32768,
+  },
+  "togethercomputer/falcon-40b-instruct": {
+    id: "togethercomputer/falcon-40b-instruct",
+    organization: "TII UAE",
+    name: "Falcon Instruct (40B)",
+    maxLength: 2048,
+  },
+  "togethercomputer/falcon-7b-instruct": {
+    id: "togethercomputer/falcon-7b-instruct",
+    organization: "TII UAE",
+    name: "Falcon Instruct (7B)",
+    maxLength: 2048,
+  },
+  "togethercomputer/GPT-NeoXT-Chat-Base-20B": {
+    id: "togethercomputer/GPT-NeoXT-Chat-Base-20B",
+    organization: "Together",
+    name: "GPT-NeoXT-Chat-Base (20B)",
+    maxLength: 2048,
+  },
+  "togethercomputer/llama-2-13b-chat": {
+    id: "togethercomputer/llama-2-13b-chat",
+    organization: "Meta",
+    name: "LLaMA-2 Chat (13B)",
+    maxLength: 4096,
+  },
+  "togethercomputer/llama-2-70b-chat": {
+    id: "togethercomputer/llama-2-70b-chat",
+    organization: "Meta",
+    name: "LLaMA-2 Chat (70B)",
+    maxLength: 4096,
+  },
+  "togethercomputer/llama-2-7b-chat": {
+    id: "togethercomputer/llama-2-7b-chat",
+    organization: "Meta",
+    name: "LLaMA-2 Chat (7B)",
+    maxLength: 4096,
+  },
+  "togethercomputer/Llama-2-7B-32K-Instruct": {
+    id: "togethercomputer/Llama-2-7B-32K-Instruct",
+    organization: "Together",
+    name: "LLaMA-2-7B-32K-Instruct (7B)",
+    maxLength: 32768,
+  },
+  "mistralai/Mistral-7B-Instruct-v0.1": {
+    id: "mistralai/Mistral-7B-Instruct-v0.1",
+    organization: "MistralAI",
+    name: "Mistral (7B) Instruct v0.1",
+    maxLength: 4096,
+  },
+  "mistralai/Mistral-7B-Instruct-v0.2": {
+    id: "mistralai/Mistral-7B-Instruct-v0.2",
+    organization: "MistralAI",
+    name: "Mistral (7B) Instruct v0.2",
+    maxLength: 32768,
+  },
+  "mistralai/Mixtral-8x7B-Instruct-v0.1": {
+    id: "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    organization: "MistralAI",
+    name: "Mixtral-8x7B Instruct",
+    maxLength: 32768,
+  },
+  "Gryphe/MythoMax-L2-13b": {
+    id: "Gryphe/MythoMax-L2-13b",
+    organization: "Gryphe",
+    name: "MythoMax-L2 (13B)",
+    maxLength: 4096,
+  },
+  "NousResearch/Nous-Hermes-llama-2-7b": {
+    id: "NousResearch/Nous-Hermes-llama-2-7b",
+    organization: "NousResearch",
+    name: "Nous Hermes LLaMA-2 (7B)",
+    maxLength: 4096,
+  },
+  "NousResearch/Nous-Hermes-Llama2-13b": {
+    id: "NousResearch/Nous-Hermes-Llama2-13b",
+    organization: "NousResearch",
+    name: "Nous Hermes Llama-2 (13B)",
+    maxLength: 4096,
+  },
+  "NousResearch/Nous-Hermes-Llama2-70b": {
+    id: "NousResearch/Nous-Hermes-Llama2-70b",
+    organization: "NousResearch",
+    name: "Nous Hermes Llama-2 (70B)",
+    maxLength: 4096,
+  },
+  "NousResearch/Nous-Hermes-2-Yi-34B": {
+    id: "NousResearch/Nous-Hermes-2-Yi-34B",
+    organization: "NousResearch",
+    name: "Nous Hermes-2 Yi (34B)",
+    maxLength: 4096,
+  },
+  "NousResearch/Nous-Capybara-7B-V1p9": {
+    id: "NousResearch/Nous-Capybara-7B-V1p9",
+    organization: "NousResearch",
+    name: "Nous Capybara v1.9 (7B)",
+    maxLength: 8192,
+  },
+  "openchat/openchat-3.5-1210": {
+    id: "openchat/openchat-3.5-1210",
+    organization: "OpenChat",
+    name: "OpenChat 3.5 1210 (7B)",
+    maxLength: 8192,
+  },
+  "teknium/OpenHermes-2-Mistral-7B": {
+    id: "teknium/OpenHermes-2-Mistral-7B",
+    organization: "teknium",
+    name: "OpenHermes-2-Mistral (7B)",
+    maxLength: 4096,
+  },
+  "teknium/OpenHermes-2p5-Mistral-7B": {
+    id: "teknium/OpenHermes-2p5-Mistral-7B",
+    organization: "teknium",
+    name: "OpenHermes-2.5-Mistral (7B)",
+    maxLength: 4096,
+  },
+  "Open-Orca/Mistral-7B-OpenOrca": {
+    id: "Open-Orca/Mistral-7B-OpenOrca",
+    organization: "OpenOrca",
+    name: "OpenOrca Mistral (7B) 8K",
+    maxLength: 8192,
+  },
+  "garage-bAInd/Platypus2-70B-instruct": {
+    id: "garage-bAInd/Platypus2-70B-instruct",
+    organization: "garage-bAInd",
+    name: "Platypus2 Instruct (70B)",
+    maxLength: 4096,
+  },
+  "togethercomputer/Pythia-Chat-Base-7B-v0.16": {
+    id: "togethercomputer/Pythia-Chat-Base-7B-v0.16",
+    organization: "Together",
+    name: "Pythia-Chat-Base (7B)",
+    maxLength: 2048,
+  },
+  "togethercomputer/Qwen-7B-Chat": {
+    id: "togethercomputer/Qwen-7B-Chat",
+    organization: "Qwen",
+    name: "Qwen-Chat (7B)",
+    maxLength: 8192,
+  },
+  "togethercomputer/RedPajama-INCITE-Chat-3B-v1": {
+    id: "togethercomputer/RedPajama-INCITE-Chat-3B-v1",
+    organization: "Together",
+    name: "RedPajama-INCITE Chat (3B)",
+    maxLength: 2048,
+  },
+  "togethercomputer/RedPajama-INCITE-7B-Chat": {
+    id: "togethercomputer/RedPajama-INCITE-7B-Chat",
+    organization: "Together",
+    name: "RedPajama-INCITE Chat (7B)",
+    maxLength: 2048,
+  },
+  "upstage/SOLAR-0-70b-16bit": {
+    id: "upstage/SOLAR-0-70b-16bit",
+    organization: "Upstage",
+    name: "SOLAR v0 (70B)",
+    maxLength: 4096,
+  },
+  "togethercomputer/StripedHyena-Nous-7B": {
+    id: "togethercomputer/StripedHyena-Nous-7B",
+    organization: "Together",
+    name: "StripedHyena Nous (7B)",
+    maxLength: 32768,
+  },
+  "lmsys/vicuna-7b-v1.5": {
+    id: "lmsys/vicuna-7b-v1.5",
+    organization: "LM Sys",
+    name: "Vicuna v1.5 (7B)",
+    maxLength: 4096,
+  },
+  "lmsys/vicuna-13b-v1.5": {
+    id: "lmsys/vicuna-13b-v1.5",
+    organization: "LM Sys",
+    name: "Vicuna v1.5 (13B)",
+    maxLength: 4096,
+  },
+  "lmsys/vicuna-13b-v1.5-16k": {
+    id: "lmsys/vicuna-13b-v1.5-16k",
+    organization: "LM Sys",
+    name: "Vicuna v1.5 16K (13B)",
+    maxLength: 16384,
+  },
+  "zero-one-ai/Yi-34B-Chat": {
+    id: "zero-one-ai/Yi-34B-Chat",
+    organization: "01.AI",
+    name: "01-ai Yi Chat (34B)",
+    maxLength: 4096,
+  },
+};
+
+module.exports.MODELS = MODELS;
--- a/server/utils/AiProviders/togetherAi/scripts/.gitignore
+++ b/server/utils/AiProviders/togetherAi/scripts/.gitignore
@ -0,0 +1 @@
+*.json
--- a/server/utils/AiProviders/togetherAi/scripts/chat_models.txt
+++ b/server/utils/AiProviders/togetherAi/scripts/chat_models.txt
@ -0,0 +1,39 @@
+| Organization  | Model Name                   | Model String for API                         | Max Seq Length |
+| ------------- | ---------------------------- | -------------------------------------------- | -------------- |
+| Stanford      | Alpaca (7B)                  | togethercomputer/alpaca-7b                   | 2048           |
+| Austism       | Chronos Hermes (13B)         | Austism/chronos-hermes-13b                   | 2048           |
+| Meta          | Code Llama Instruct (13B)    | togethercomputer/CodeLlama-13b-Instruct      | 8192           |
+| Meta          | Code Llama Instruct (34B)    | togethercomputer/CodeLlama-34b-Instruct      | 8192           |
+| Meta          | Code Llama Instruct (7B)     | togethercomputer/CodeLlama-7b-Instruct       | 8192           |
+| DiscoResearch | DiscoLM Mixtral 8x7b         | DiscoResearch/DiscoLM-mixtral-8x7b-v2        | 32768          |
+| TII UAE       | Falcon Instruct (40B)        | togethercomputer/falcon-40b-instruct         | 2048           |
+| TII UAE       | Falcon Instruct (7B)         | togethercomputer/falcon-7b-instruct          | 2048           |
+| Together      | GPT-NeoXT-Chat-Base (20B)    | togethercomputer/GPT-NeoXT-Chat-Base-20B     | 2048           |
+| Meta          | LLaMA-2 Chat (13B)           | togethercomputer/llama-2-13b-chat            | 4096           |
+| Meta          | LLaMA-2 Chat (70B)           | togethercomputer/llama-2-70b-chat            | 4096           |
+| Meta          | LLaMA-2 Chat (7B)            | togethercomputer/llama-2-7b-chat             | 4096           |
+| Together      | LLaMA-2-7B-32K-Instruct (7B) | togethercomputer/Llama-2-7B-32K-Instruct     | 32768          |
+| MistralAI     | Mistral (7B) Instruct v0.1   | mistralai/Mistral-7B-Instruct-v0.1           | 4096           |
+| MistralAI     | Mistral (7B) Instruct v0.2   | mistralai/Mistral-7B-Instruct-v0.2           | 32768          |
+| MistralAI     | Mixtral-8x7B Instruct        | mistralai/Mixtral-8x7B-Instruct-v0.1         | 32768          |
+| Gryphe        | MythoMax-L2 (13B)            | Gryphe/MythoMax-L2-13b                       | 4096           |
+| NousResearch  | Nous Hermes LLaMA-2 (7B)     | NousResearch/Nous-Hermes-llama-2-7b          | 4096           |
+| NousResearch  | Nous Hermes Llama-2 (13B)    | NousResearch/Nous-Hermes-Llama2-13b          | 4096           |
+| NousResearch  | Nous Hermes Llama-2 (70B)    | NousResearch/Nous-Hermes-Llama2-70b          | 4096           |
+| NousResearch  | Nous Hermes-2 Yi (34B)       | NousResearch/Nous-Hermes-2-Yi-34B            | 4096           |
+| NousResearch  | Nous Capybara v1.9 (7B)      | NousResearch/Nous-Capybara-7B-V1p9           | 8192           |
+| OpenChat      | OpenChat 3.5 1210 (7B)       | openchat/openchat-3.5-1210                   | 8192           |
+| teknium       | OpenHermes-2-Mistral (7B)    | teknium/OpenHermes-2-Mistral-7B              | 4096           |
+| teknium       | OpenHermes-2.5-Mistral (7B)  | teknium/OpenHermes-2p5-Mistral-7B            | 4096           |
+| OpenOrca      | OpenOrca Mistral (7B) 8K     | Open-Orca/Mistral-7B-OpenOrca                | 8192           |
+| garage-bAInd  | Platypus2 Instruct (70B)     | garage-bAInd/Platypus2-70B-instruct          | 4096           |
+| Together      | Pythia-Chat-Base (7B)        | togethercomputer/Pythia-Chat-Base-7B-v0.16   | 2048           |
+| Qwen          | Qwen-Chat (7B)               | togethercomputer/Qwen-7B-Chat                | 8192           |
+| Together      | RedPajama-INCITE Chat (3B)   | togethercomputer/RedPajama-INCITE-Chat-3B-v1 | 2048           |
+| Together      | RedPajama-INCITE Chat (7B)   | togethercomputer/RedPajama-INCITE-7B-Chat    | 2048           |
+| Upstage       | SOLAR v0 (70B)               | upstage/SOLAR-0-70b-16bit                    | 4096           |
+| Together      | StripedHyena Nous (7B)       | togethercomputer/StripedHyena-Nous-7B        | 32768          |
+| LM Sys        | Vicuna v1.5 (7B)             | lmsys/vicuna-7b-v1.5                         | 4096           |
+| LM Sys        | Vicuna v1.5 (13B)            | lmsys/vicuna-13b-v1.5                        | 4096           |
+| LM Sys        | Vicuna v1.5 16K (13B)        | lmsys/vicuna-13b-v1.5-16k                    | 16384          |
+| 01.AI         | 01-ai Yi Chat (34B)          | zero-one-ai/Yi-34B-Chat                      | 4096           |
--- a/server/utils/AiProviders/togetherAi/scripts/parse.mjs
+++ b/server/utils/AiProviders/togetherAi/scripts/parse.mjs
@ -0,0 +1,41 @@
+// Together AI does not provide a simple REST API to get models,
+// so we have a table which we copy from their documentation
+// https://docs.together.ai/edit/inference-models that we can
+// then parse and get all models from in a format that makes sense
+// Why this does not exist is so bizarre, but whatever.
+
+// To run, cd into this directory and run `node parse.mjs`
+// copy outputs into the export in ../models.js
+
+// Update the date below if you run this again because TogetherAI added new models.
+// Last Collected: Jan 10, 2023
+
+import fs from "fs";
+
+function parseChatModels() {
+  const fixed = {};
+  const tableString = fs.readFileSync("chat_models.txt", { encoding: "utf-8" });
+  const rows = tableString.split("\n").slice(2);
+
+  rows.forEach((row) => {
+    const [provider, name, id, maxLength] = row.split("|").slice(1, -1);
+    const data = {
+      provider: provider.trim(),
+      name: name.trim(),
+      id: id.trim(),
+      maxLength: Number(maxLength.trim()),
+    };
+
+    fixed[data.id] = {
+      id: data.id,
+      organization: data.provider,
+      name: data.name,
+      maxLength: data.maxLength,
+    };
+  });
+
+  fs.writeFileSync("chat_models.json", JSON.stringify(fixed, null, 2), "utf-8");
+  return fixed;
+}
+
+parseChatModels();
--- a/server/utils/chats/stream.js
+++ b/server/utils/chats/stream.js
@ -262,6 +262,96 @@ function handleStreamResponses(response, stream, responseProps) {
    });
  }

+  if ((stream.type = "togetherAiStream")) {
+    return new Promise((resolve) => {
+      let fullText = "";
+      let chunk = "";
+      stream.stream.data.on("data", (data) => {
+        const lines = data
+          ?.toString()
+          ?.split("\n")
+          .filter((line) => line.trim() !== "");
+
+        for (const line of lines) {
+          let validJSON = false;
+          const message = chunk + line.replace(/^data: /, "");
+
+          if (message !== "[DONE]") {
+            // JSON chunk is incomplete and has not ended yet
+            // so we need to stitch it together. You would think JSON
+            // chunks would only come complete - but they don't!
+            try {
+              JSON.parse(message);
+              validJSON = true;
+            } catch {}
+
+            if (!validJSON) {
+              // It can be possible that the chunk decoding is running away
+              // and the message chunk fails to append due to string length.
+              // In this case abort the chunk and reset so we can continue.
+              // ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
+              try {
+                chunk += message;
+              } catch (e) {
+                console.error(`Chunk appending error`, e);
+                chunk = "";
+              }
+              continue;
+            } else {
+              chunk = "";
+            }
+          }
+
+          if (message == "[DONE]") {
+            writeResponseChunk(response, {
+              uuid,
+              sources,
+              type: "textResponseChunk",
+              textResponse: "",
+              close: true,
+              error: false,
+            });
+            resolve(fullText);
+          } else {
+            let finishReason = null;
+            let token = "";
+            try {
+              const json = JSON.parse(message);
+              token = json?.choices?.[0]?.delta?.content;
+              finishReason = json?.choices?.[0]?.finish_reason || null;
+            } catch {
+              continue;
+            }
+
+            if (token) {
+              fullText += token;
+              writeResponseChunk(response, {
+                uuid,
+                sources: [],
+                type: "textResponseChunk",
+                textResponse: token,
+                close: false,
+                error: false,
+              });
+            }
+
+            if (finishReason !== null) {
+              writeResponseChunk(response, {
+                uuid,
+                sources,
+                type: "textResponseChunk",
+                textResponse: "",
+                close: true,
+                error: false,
+              });
+              resolve(fullText);
+            }
+          }
+        }
+      });
+    });
+  }
+
  // If stream is not a regular OpenAI Stream (like if using native model, Ollama, or most LangChain interfaces)
  // we can just iterate the stream content instead.
  if (!stream.hasOwnProperty("data")) {
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -1,4 +1,11 @@
-const SUPPORT_CUSTOM_MODELS = ["openai", "localai", "ollama", "native-llm"];
+const { togetherAiModels } = require("../AiProviders/togetherAi");
+const SUPPORT_CUSTOM_MODELS = [
+  "openai",
+  "localai",
+  "ollama",
+  "native-llm",
+  "togetherai",
+];

 async function getCustomModels(provider = "", apiKey = null, basePath = null) {
  if (!SUPPORT_CUSTOM_MODELS.includes(provider))
@ -11,6 +18,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
      return await localAIModels(basePath, apiKey);
    case "ollama":
      return await ollamaAIModels(basePath, apiKey);
+    case "togetherai":
+      return await getTogetherAiModels();
    case "native-llm":
      return nativeLLMModels();
    default:
@ -92,6 +101,21 @@ async function ollamaAIModels(basePath = null, _apiKey = null) {
  return { models, error: null };
 }

+async function getTogetherAiModels() {
+  const knownModels = togetherAiModels();
+  if (!Object.keys(knownModels).length === 0)
+    return { models: [], error: null };
+
+  const models = Object.values(knownModels).map((model) => {
+    return {
+      id: model.id,
+      organization: model.organization,
+      name: model.name,
+    };
+  });
+  return { models, error: null };
+}
+
 function nativeLLMModels() {
  const fs = require("fs");
  const path = require("path");
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@ -46,6 +46,9 @@ function getLLMProvider() {
    case "ollama":
      const { OllamaAILLM } = require("../AiProviders/ollama");
      return new OllamaAILLM(embedder);
+    case "togetherai":
+      const { TogetherAiLLM } = require("../AiProviders/togetherAi");
+      return new TogetherAiLLM(embedder);
    case "native":
      const { NativeLLM } = require("../AiProviders/native");
      return new NativeLLM(embedder);
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -170,6 +170,16 @@ const KEY_MAPPING = {
    checks: [],
  },

+  // Together Ai Options
+  TogetherAiApiKey: {
+    envKey: "TOGETHER_AI_API_KEY",
+    checks: [isNotEmpty],
+  },
+  TogetherAiModelPref: {
+    envKey: "TOGETHER_AI_MODEL_PREF",
+    checks: [isNotEmpty],
+  },
+
  // System Settings
  AuthToken: {
    envKey: "AUTH_TOKEN",
@ -233,7 +243,7 @@ function validOllamaLLMBasePath(input = "") {
 }

 function supportedLLM(input = "") {
-  return [
+  const validSelection = [
    "openai",
    "azure",
    "anthropic",
@ -242,7 +252,9 @@ function supportedLLM(input = "") {
    "localai",
    "ollama",
    "native",
+    "togetherai",
  ].includes(input);
+  return validSelection ? null : `${input} is not a valid LLM provider.`;
 }

 function validGeminiModel(input = "") {