Merge branch 'master' of github.com:Mintplex-Labs/anything-llm into dark-mode

2024-11-19 12:40:09 +01:00 · 2024-10-16 14:20:23 -07:00 · 2024-10-16 14:20:23 -07:00 · 83b1949774
commit 83b1949774
parent 351a1447a7 93d7ce6d34
16 changed files with 249 additions and 16 deletions
--- a/docker/.env.example
+++ b/docker/.env.example
@ -219,6 +219,11 @@ GID='1000'
 # TTS_OPEN_AI_KEY=sk-example
 # TTS_OPEN_AI_VOICE_MODEL=nova
 # TTS_PROVIDER="generic-openai"
 # TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
 # TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
 # TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
 # TTS_PROVIDER="elevenlabs"
 # TTS_ELEVEN_LABS_KEY=
 # TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel
--- a/frontend/src/components/TextToSpeech/OpenAiGenericOptions/index.jsx
+++ b/frontend/src/components/TextToSpeech/OpenAiGenericOptions/index.jsx
@ -0,0 +1,69 @@
 import React from "react";
 export default function OpenAiGenericTextToSpeechOptions({ settings }) {
  return (
    <div className="w-full flex flex-col gap-y-7">
      <div className="flex gap-x-4">
        <div className="flex flex-col w-60">
          <div className="flex justify-between items-center mb-2">
            <label className="text-white text-sm font-semibold">Base URL</label>
          </div>
          <input
            type="url"
            name="TTSOpenAICompatibleEndpoint"
            className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
            placeholder="http://localhost:7851/v1"
            defaultValue={settings?.TTSOpenAICompatibleEndpoint}
            required={false}
            autoComplete="off"
            spellCheck={false}
          />
          <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
            This should be the base URL of the OpenAI compatible TTS service you
            will generate TTS responses from.
          </p>
        </div>
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-3">
            API Key
          </label>
          <input
            type="password"
            name="TTSOpenAICompatibleKey"
            className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
            placeholder="API Key"
            defaultValue={
              settings?.TTSOpenAICompatibleKey ? "*".repeat(20) : ""
            }
            autoComplete="off"
            spellCheck={false}
          />
          <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
            Some TTS services require an API key to generate TTS responses -
            this is optional if your service does not require one.
          </p>
        </div>
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-3">
            Voice Model
          </label>
          <input
            type="text"
            name="TTSOpenAICompatibleVoiceModel"
            className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
            placeholder="Your voice model identifier"
            defaultValue={settings?.TTSOpenAICompatibleVoiceModel}
            required={true}
            autoComplete="off"
            spellCheck={false}
          />
          <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
            Most TTS services will have several voice models available, this is
            the identifier for the voice model you want to use.
          </p>
        </div>
      </div>
    </div>
  );
 }
--- a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/index.jsx
@ -23,6 +23,7 @@ export default function TTSMessage({ slug, chatId, message }) {
  switch (provider) {
    case "openai":
    case "generic-openai":
    case "elevenlabs":
      return <AsyncTTSMessage slug={slug} chatId={chatId} />;
    case "piper_local":
--- a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/index.jsx
@ -76,11 +76,13 @@ const HistoricalMessage = ({
          <div className="flex flex-col items-center">
            <ProfileImage role={role} workspace={workspace} />
            <div className="mt-1 -mb-10">
-              <TTSMessage
+              {role === "assistant" && (
-                slug={workspace?.slug}
+                <TTSMessage
-                chatId={chatId}
+                  slug={workspace?.slug}
-                message={message}
+                  chatId={chatId}
-              />
+                  message={message}
                />
              )}
            </div>
          </div>
          {isEditing ? (
--- a/frontend/src/media/ttsproviders/generic-openai.png
+++ b/frontend/src/media/ttsproviders/generic-openai.png
--- a/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx
+++ b/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx
@ -8,10 +8,13 @@ import OpenAiLogo from "@/media/llmprovider/openai.png";
 import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
 import ElevenLabsIcon from "@/media/ttsproviders/elevenlabs.png";
 import PiperTTSIcon from "@/media/ttsproviders/piper.png";
 import GenericOpenAiLogo from "@/media/ttsproviders/generic-openai.png";
 import BrowserNative from "@/components/TextToSpeech/BrowserNative";
 import OpenAiTTSOptions from "@/components/TextToSpeech/OpenAiOptions";
 import ElevenLabsTTSOptions from "@/components/TextToSpeech/ElevenLabsOptions";
 import PiperTTSOptions from "@/components/TextToSpeech/PiperTTSOptions";
 import OpenAiGenericTTSOptions from "@/components/TextToSpeech/OpenAiGenericOptions";
 const PROVIDERS = [
  {
@ -42,6 +45,14 @@ const PROVIDERS = [
    options: (settings) => <PiperTTSOptions settings={settings} />,
    description: "Run TTS models locally in your browser privately.",
  },
  {
    name: "OpenAI Compatible",
    value: "generic-openai",
    logo: GenericOpenAiLogo,
    options: (settings) => <OpenAiGenericTTSOptions settings={settings} />,
    description:
      "Connect to an OpenAI compatible TTS service running locally or remotely.",
  },
 ];
 export default function TextToSpeechProvider({ settings }) {
--- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentModelSelection/index.jsx
+++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentModelSelection/index.jsx
@ -5,14 +5,30 @@ import paths from "@/utils/paths";
 import { useTranslation } from "react-i18next";
 import { Link, useParams } from "react-router-dom";
-// These models do NOT support function calling
+/**
 * These models do NOT support function calling
 * or do not support system prompts
 * and therefore are not supported for agents.
 * @param {string} provider - The AI provider.
 * @param {string} model - The model name.
 * @returns {boolean} Whether the model is supported for agents.
 */
 function supportedModel(provider, model = "") {
-  if (provider !== "openai") return true;
+  if (provider === "openai") {
-  return (
+    return (
-    ["gpt-3.5-turbo-0301", "gpt-4-turbo-2024-04-09", "gpt-4-turbo"].includes(
+      [
-      model
+        "gpt-3.5-turbo-0301",
-    ) === false
+        "gpt-4-turbo-2024-04-09",
-  );
+        "gpt-4-turbo",
        "o1-preview",
        "o1-preview-2024-09-12",
        "o1-mini",
        "o1-mini-2024-09-12",
      ].includes(model) === false
    );
  }
  return true;
 }
 export default function AgentModelSelection({
--- a/server/.env.example
+++ b/server/.env.example
@ -213,6 +213,11 @@ TTS_PROVIDER="native"
 # TTS_ELEVEN_LABS_KEY=
 # TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel
 # TTS_PROVIDER="generic-openai"
 # TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
 # TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
 # TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
 # CLOUD DEPLOYMENT VARIRABLES ONLY
 # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
 # STORAGE_DIR= # absolute filesystem path with no trailing slash
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -221,12 +221,18 @@ const SystemSettings = {
      TextToSpeechProvider: process.env.TTS_PROVIDER || "native",
      TTSOpenAIKey: !!process.env.TTS_OPEN_AI_KEY,
      TTSOpenAIVoiceModel: process.env.TTS_OPEN_AI_VOICE_MODEL,
      // Eleven Labs TTS
      TTSElevenLabsKey: !!process.env.TTS_ELEVEN_LABS_KEY,
      TTSElevenLabsVoiceModel: process.env.TTS_ELEVEN_LABS_VOICE_MODEL,
      // Piper TTS
      TTSPiperTTSVoiceModel:
        process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium",
      // OpenAI Generic TTS
      TTSOpenAICompatibleKey: !!process.env.TTS_OPEN_AI_COMPATIBLE_KEY,
      TTSOpenAICompatibleVoiceModel:
        process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
      TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
      // --------------------------------------------------------
      // Agent Settings & Configs
--- a/server/utils/AiProviders/bedrock/index.js
+++ b/server/utils/AiProviders/bedrock/index.js
@ -7,6 +7,20 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 // Docs: https://js.langchain.com/v0.2/docs/integrations/chat/bedrock_converse
 class AWSBedrockLLM {
  /**
   * These models do not support system prompts
   * It is not explicitly stated but it is observed that they do not use the system prompt
   * in their responses and will crash when a system prompt is provided.
   * We can add more models to this list as we discover them or new models are added.
   * We may want to extend this list or make a user-config if using custom bedrock models.
   */
  noSystemPromptModels = [
    "amazon.titan-text-express-v1",
    "amazon.titan-text-lite-v1",
    "cohere.command-text-v14",
    "cohere.command-light-text-v14",
  ];
  constructor(embedder = null, modelPreference = null) {
    if (!process.env.AWS_BEDROCK_LLM_ACCESS_KEY_ID)
      throw new Error("No AWS Bedrock LLM profile id was set.");
@ -59,6 +73,22 @@ class AWSBedrockLLM {
    for (const chat of chats) {
      if (!roleToMessageMap.hasOwnProperty(chat.role)) continue;
      // When a model does not support system prompts, we need to handle it.
      // We will add a new message that simulates the system prompt via a user message and AI response.
      // This will allow the model to respond without crashing but we can still inject context.
      if (
        this.noSystemPromptModels.includes(this.model) &&
        chat.role === "system"
      ) {
        this.#log(
          `Model does not support system prompts! Simulating system prompt via Human/AI message pairs.`
        );
        langchainChats.push(new HumanMessage({ content: chat.content }));
        langchainChats.push(new AIMessage({ content: "Okay." }));
        continue;
      }
      const MessageClass = roleToMessageMap[chat.role];
      langchainChats.push(new MessageClass({ content: chat.content }));
    }
@ -78,6 +108,10 @@ class AWSBedrockLLM {
    );
  }
  #log(text, ...args) {
    console.log(`\x1b[32m[AWSBedrock]\x1b[0m ${text}`, ...args);
  }
  streamingEnabled() {
    return "streamGetChatCompletion" in this;
  }
--- a/server/utils/AiProviders/modelMap.js
+++ b/server/utils/AiProviders/modelMap.js
@ -52,6 +52,10 @@ const MODEL_MAP = {
    "gpt-4-turbo-preview": 128_000,
    "gpt-4": 8_192,
    "gpt-4-32k": 32_000,
    "o1-preview": 128_000,
    "o1-preview-2024-09-12": 128_000,
    "o1-mini": 128_000,
    "o1-mini-2024-09-12": 128_000,
  },
  deepseek: {
    "deepseek-chat": 128_000,
--- a/server/utils/AiProviders/openAi/index.js
+++ b/server/utils/AiProviders/openAi/index.js
@ -23,6 +23,14 @@ class OpenAiLLM {
    this.defaultTemp = 0.7;
  }
  /**
   * Check if the model is an o1 model.
   * @returns {boolean}
   */
  get isO1Model() {
    return this.model.startsWith("o1");
  }
  #appendContext(contextTexts = []) {
    if (!contextTexts || !contextTexts.length) return "";
    return (
@ -36,6 +44,7 @@ class OpenAiLLM {
  }
  streamingEnabled() {
    if (this.isO1Model) return false;
    return "streamGetChatCompletion" in this;
  }
@ -98,8 +107,11 @@ class OpenAiLLM {
    userPrompt = "",
    attachments = [], // This is the specific attachment for only this prompt
  }) {
    // o1 Models do not support the "system" role
    // in order to combat this, we can use the "user" role as a replacement for now
    // https://community.openai.com/t/o1-models-do-not-support-system-role-in-chat-completion/953880
    const prompt = {
-      role: "system",
+      role: this.isO1Model ? "user" : "system",
      content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
    };
    return [
@ -122,7 +134,7 @@ class OpenAiLLM {
      .create({
        model: this.model,
        messages,
-        temperature,
+        temperature: this.isO1Model ? 1 : temperature, // o1 models only accept temperature 1
      })
      .catch((e) => {
        throw new Error(e.message);
@ -143,7 +155,7 @@ class OpenAiLLM {
      model: this.model,
      stream: true,
      messages,
-      temperature,
+      temperature: this.isO1Model ? 1 : temperature, // o1 models only accept temperature 1
    });
    return streamRequest;
  }
--- a/server/utils/TextToSpeech/index.js
+++ b/server/utils/TextToSpeech/index.js
@ -7,6 +7,9 @@ function getTTSProvider() {
    case "elevenlabs":
      const { ElevenLabsTTS } = require("./elevenLabs");
      return new ElevenLabsTTS();
    case "generic-openai":
      const { GenericOpenAiTTS } = require("./openAiGeneric");
      return new GenericOpenAiTTS();
    default:
      throw new Error("ENV: No TTS_PROVIDER value found in environment!");
  }
--- a/server/utils/TextToSpeech/openAiGeneric/index.js
+++ b/server/utils/TextToSpeech/openAiGeneric/index.js
@ -0,0 +1,50 @@
 class GenericOpenAiTTS {
  constructor() {
    if (!process.env.TTS_OPEN_AI_COMPATIBLE_KEY)
      this.#log(
        "No OpenAI compatible API key was set. You might need to set this to use your OpenAI compatible TTS service."
      );
    if (!process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL)
      this.#log(
        "No OpenAI compatible voice model was set. We will use the default voice model 'alloy'. This may not exist for your selected endpoint."
      );
    if (!process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT)
      throw new Error(
        "No OpenAI compatible endpoint was set. Please set this to use your OpenAI compatible TTS service."
      );
    const { OpenAI: OpenAIApi } = require("openai");
    this.openai = new OpenAIApi({
      apiKey: process.env.TTS_OPEN_AI_COMPATIBLE_KEY || null,
      baseURL: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
    });
    this.voice = process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL ?? "alloy";
  }
  #log(text, ...args) {
    console.log(`\x1b[32m[OpenAiGenericTTS]\x1b[0m ${text}`, ...args);
  }
  /**
   * Generates a buffer from the given text input using the OpenAI compatible TTS service.
   * @param {string} textInput - The text to be converted to audio.
   * @returns {Promise<Buffer>} A buffer containing the audio data.
   */
  async ttsBuffer(textInput) {
    try {
      const result = await this.openai.audio.speech.create({
        model: "tts-1",
        voice: this.voice,
        input: textInput,
      });
      return Buffer.from(await result.arrayBuffer());
    } catch (e) {
      console.error(e);
    }
    return null;
  }
 }
 module.exports = {
  GenericOpenAiTTS,
 };
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -128,7 +128,7 @@ async function openAiModels(apiKey = null) {
    });
  const gpts = allModels
-    .filter((model) => model.id.startsWith("gpt"))
+    .filter((model) => model.id.startsWith("gpt") || model.id.startsWith("o1"))
    .filter(
      (model) => !model.id.includes("vision") && !model.id.includes("instruct")
    )
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -506,6 +506,20 @@ const KEY_MAPPING = {
    checks: [],
  },
  // OpenAI Generic TTS
  TTSOpenAICompatibleKey: {
    envKey: "TTS_OPEN_AI_COMPATIBLE_KEY",
    checks: [],
  },
  TTSOpenAICompatibleVoiceModel: {
    envKey: "TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL",
    checks: [isNotEmpty],
  },
  TTSOpenAICompatibleEndpoint: {
    envKey: "TTS_OPEN_AI_COMPATIBLE_ENDPOINT",
    checks: [isValidURL],
  },
  // DeepSeek Options
  DeepSeekApiKey: {
    envKey: "DEEPSEEK_API_KEY",
@ -589,6 +603,7 @@ function supportedTTSProvider(input = "") {
    "openai",
    "elevenlabs",
    "piper_local",
    "generic-openai",
  ].includes(input);
  return validSelection ? null : `${input} is not a valid TTS provider.`;
 }