Merge branch 'master' of github.com:Mintplex-Labs/anything-llm into dark-mode

This commit is contained in:
timothycarambat 2024-10-16 14:20:23 -07:00
commit 83b1949774
16 changed files with 249 additions and 16 deletions

View File

@ -219,6 +219,11 @@ GID='1000'
# TTS_OPEN_AI_KEY=sk-example # TTS_OPEN_AI_KEY=sk-example
# TTS_OPEN_AI_VOICE_MODEL=nova # TTS_OPEN_AI_VOICE_MODEL=nova
# TTS_PROVIDER="generic-openai"
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
# TTS_PROVIDER="elevenlabs" # TTS_PROVIDER="elevenlabs"
# TTS_ELEVEN_LABS_KEY= # TTS_ELEVEN_LABS_KEY=
# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel # TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel

View File

@ -0,0 +1,69 @@
import React from "react";
export default function OpenAiGenericTextToSpeechOptions({ settings }) {
return (
<div className="w-full flex flex-col gap-y-7">
<div className="flex gap-x-4">
<div className="flex flex-col w-60">
<div className="flex justify-between items-center mb-2">
<label className="text-white text-sm font-semibold">Base URL</label>
</div>
<input
type="url"
name="TTSOpenAICompatibleEndpoint"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="http://localhost:7851/v1"
defaultValue={settings?.TTSOpenAICompatibleEndpoint}
required={false}
autoComplete="off"
spellCheck={false}
/>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
This should be the base URL of the OpenAI compatible TTS service you
will generate TTS responses from.
</p>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
API Key
</label>
<input
type="password"
name="TTSOpenAICompatibleKey"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="API Key"
defaultValue={
settings?.TTSOpenAICompatibleKey ? "*".repeat(20) : ""
}
autoComplete="off"
spellCheck={false}
/>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Some TTS services require an API key to generate TTS responses -
this is optional if your service does not require one.
</p>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
Voice Model
</label>
<input
type="text"
name="TTSOpenAICompatibleVoiceModel"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="Your voice model identifier"
defaultValue={settings?.TTSOpenAICompatibleVoiceModel}
required={true}
autoComplete="off"
spellCheck={false}
/>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Most TTS services will have several voice models available, this is
the identifier for the voice model you want to use.
</p>
</div>
</div>
</div>
);
}

View File

@ -23,6 +23,7 @@ export default function TTSMessage({ slug, chatId, message }) {
switch (provider) { switch (provider) {
case "openai": case "openai":
case "generic-openai":
case "elevenlabs": case "elevenlabs":
return <AsyncTTSMessage slug={slug} chatId={chatId} />; return <AsyncTTSMessage slug={slug} chatId={chatId} />;
case "piper_local": case "piper_local":

View File

@ -76,11 +76,13 @@ const HistoricalMessage = ({
<div className="flex flex-col items-center"> <div className="flex flex-col items-center">
<ProfileImage role={role} workspace={workspace} /> <ProfileImage role={role} workspace={workspace} />
<div className="mt-1 -mb-10"> <div className="mt-1 -mb-10">
<TTSMessage {role === "assistant" && (
slug={workspace?.slug} <TTSMessage
chatId={chatId} slug={workspace?.slug}
message={message} chatId={chatId}
/> message={message}
/>
)}
</div> </div>
</div> </div>
{isEditing ? ( {isEditing ? (

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

View File

@ -8,10 +8,13 @@ import OpenAiLogo from "@/media/llmprovider/openai.png";
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png"; import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
import ElevenLabsIcon from "@/media/ttsproviders/elevenlabs.png"; import ElevenLabsIcon from "@/media/ttsproviders/elevenlabs.png";
import PiperTTSIcon from "@/media/ttsproviders/piper.png"; import PiperTTSIcon from "@/media/ttsproviders/piper.png";
import GenericOpenAiLogo from "@/media/ttsproviders/generic-openai.png";
import BrowserNative from "@/components/TextToSpeech/BrowserNative"; import BrowserNative from "@/components/TextToSpeech/BrowserNative";
import OpenAiTTSOptions from "@/components/TextToSpeech/OpenAiOptions"; import OpenAiTTSOptions from "@/components/TextToSpeech/OpenAiOptions";
import ElevenLabsTTSOptions from "@/components/TextToSpeech/ElevenLabsOptions"; import ElevenLabsTTSOptions from "@/components/TextToSpeech/ElevenLabsOptions";
import PiperTTSOptions from "@/components/TextToSpeech/PiperTTSOptions"; import PiperTTSOptions from "@/components/TextToSpeech/PiperTTSOptions";
import OpenAiGenericTTSOptions from "@/components/TextToSpeech/OpenAiGenericOptions";
const PROVIDERS = [ const PROVIDERS = [
{ {
@ -42,6 +45,14 @@ const PROVIDERS = [
options: (settings) => <PiperTTSOptions settings={settings} />, options: (settings) => <PiperTTSOptions settings={settings} />,
description: "Run TTS models locally in your browser privately.", description: "Run TTS models locally in your browser privately.",
}, },
{
name: "OpenAI Compatible",
value: "generic-openai",
logo: GenericOpenAiLogo,
options: (settings) => <OpenAiGenericTTSOptions settings={settings} />,
description:
"Connect to an OpenAI compatible TTS service running locally or remotely.",
},
]; ];
export default function TextToSpeechProvider({ settings }) { export default function TextToSpeechProvider({ settings }) {

View File

@ -5,14 +5,30 @@ import paths from "@/utils/paths";
import { useTranslation } from "react-i18next"; import { useTranslation } from "react-i18next";
import { Link, useParams } from "react-router-dom"; import { Link, useParams } from "react-router-dom";
// These models do NOT support function calling /**
* These models do NOT support function calling
* or do not support system prompts
* and therefore are not supported for agents.
* @param {string} provider - The AI provider.
* @param {string} model - The model name.
* @returns {boolean} Whether the model is supported for agents.
*/
function supportedModel(provider, model = "") { function supportedModel(provider, model = "") {
if (provider !== "openai") return true; if (provider === "openai") {
return ( return (
["gpt-3.5-turbo-0301", "gpt-4-turbo-2024-04-09", "gpt-4-turbo"].includes( [
model "gpt-3.5-turbo-0301",
) === false "gpt-4-turbo-2024-04-09",
); "gpt-4-turbo",
"o1-preview",
"o1-preview-2024-09-12",
"o1-mini",
"o1-mini-2024-09-12",
].includes(model) === false
);
}
return true;
} }
export default function AgentModelSelection({ export default function AgentModelSelection({

View File

@ -213,6 +213,11 @@ TTS_PROVIDER="native"
# TTS_ELEVEN_LABS_KEY= # TTS_ELEVEN_LABS_KEY=
# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel # TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel
# TTS_PROVIDER="generic-openai"
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
# CLOUD DEPLOYMENT VARIRABLES ONLY # CLOUD DEPLOYMENT VARIRABLES ONLY
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting. # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
# STORAGE_DIR= # absolute filesystem path with no trailing slash # STORAGE_DIR= # absolute filesystem path with no trailing slash

View File

@ -221,12 +221,18 @@ const SystemSettings = {
TextToSpeechProvider: process.env.TTS_PROVIDER || "native", TextToSpeechProvider: process.env.TTS_PROVIDER || "native",
TTSOpenAIKey: !!process.env.TTS_OPEN_AI_KEY, TTSOpenAIKey: !!process.env.TTS_OPEN_AI_KEY,
TTSOpenAIVoiceModel: process.env.TTS_OPEN_AI_VOICE_MODEL, TTSOpenAIVoiceModel: process.env.TTS_OPEN_AI_VOICE_MODEL,
// Eleven Labs TTS // Eleven Labs TTS
TTSElevenLabsKey: !!process.env.TTS_ELEVEN_LABS_KEY, TTSElevenLabsKey: !!process.env.TTS_ELEVEN_LABS_KEY,
TTSElevenLabsVoiceModel: process.env.TTS_ELEVEN_LABS_VOICE_MODEL, TTSElevenLabsVoiceModel: process.env.TTS_ELEVEN_LABS_VOICE_MODEL,
// Piper TTS // Piper TTS
TTSPiperTTSVoiceModel: TTSPiperTTSVoiceModel:
process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium", process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium",
// OpenAI Generic TTS
TTSOpenAICompatibleKey: !!process.env.TTS_OPEN_AI_COMPATIBLE_KEY,
TTSOpenAICompatibleVoiceModel:
process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
// -------------------------------------------------------- // --------------------------------------------------------
// Agent Settings & Configs // Agent Settings & Configs

View File

@ -7,6 +7,20 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native");
// Docs: https://js.langchain.com/v0.2/docs/integrations/chat/bedrock_converse // Docs: https://js.langchain.com/v0.2/docs/integrations/chat/bedrock_converse
class AWSBedrockLLM { class AWSBedrockLLM {
/**
* These models do not support system prompts
* It is not explicitly stated but it is observed that they do not use the system prompt
* in their responses and will crash when a system prompt is provided.
* We can add more models to this list as we discover them or new models are added.
* We may want to extend this list or make a user-config if using custom bedrock models.
*/
noSystemPromptModels = [
"amazon.titan-text-express-v1",
"amazon.titan-text-lite-v1",
"cohere.command-text-v14",
"cohere.command-light-text-v14",
];
constructor(embedder = null, modelPreference = null) { constructor(embedder = null, modelPreference = null) {
if (!process.env.AWS_BEDROCK_LLM_ACCESS_KEY_ID) if (!process.env.AWS_BEDROCK_LLM_ACCESS_KEY_ID)
throw new Error("No AWS Bedrock LLM profile id was set."); throw new Error("No AWS Bedrock LLM profile id was set.");
@ -59,6 +73,22 @@ class AWSBedrockLLM {
for (const chat of chats) { for (const chat of chats) {
if (!roleToMessageMap.hasOwnProperty(chat.role)) continue; if (!roleToMessageMap.hasOwnProperty(chat.role)) continue;
// When a model does not support system prompts, we need to handle it.
// We will add a new message that simulates the system prompt via a user message and AI response.
// This will allow the model to respond without crashing but we can still inject context.
if (
this.noSystemPromptModels.includes(this.model) &&
chat.role === "system"
) {
this.#log(
`Model does not support system prompts! Simulating system prompt via Human/AI message pairs.`
);
langchainChats.push(new HumanMessage({ content: chat.content }));
langchainChats.push(new AIMessage({ content: "Okay." }));
continue;
}
const MessageClass = roleToMessageMap[chat.role]; const MessageClass = roleToMessageMap[chat.role];
langchainChats.push(new MessageClass({ content: chat.content })); langchainChats.push(new MessageClass({ content: chat.content }));
} }
@ -78,6 +108,10 @@ class AWSBedrockLLM {
); );
} }
#log(text, ...args) {
console.log(`\x1b[32m[AWSBedrock]\x1b[0m ${text}`, ...args);
}
streamingEnabled() { streamingEnabled() {
return "streamGetChatCompletion" in this; return "streamGetChatCompletion" in this;
} }

View File

@ -52,6 +52,10 @@ const MODEL_MAP = {
"gpt-4-turbo-preview": 128_000, "gpt-4-turbo-preview": 128_000,
"gpt-4": 8_192, "gpt-4": 8_192,
"gpt-4-32k": 32_000, "gpt-4-32k": 32_000,
"o1-preview": 128_000,
"o1-preview-2024-09-12": 128_000,
"o1-mini": 128_000,
"o1-mini-2024-09-12": 128_000,
}, },
deepseek: { deepseek: {
"deepseek-chat": 128_000, "deepseek-chat": 128_000,

View File

@ -23,6 +23,14 @@ class OpenAiLLM {
this.defaultTemp = 0.7; this.defaultTemp = 0.7;
} }
/**
* Check if the model is an o1 model.
* @returns {boolean}
*/
get isO1Model() {
return this.model.startsWith("o1");
}
#appendContext(contextTexts = []) { #appendContext(contextTexts = []) {
if (!contextTexts || !contextTexts.length) return ""; if (!contextTexts || !contextTexts.length) return "";
return ( return (
@ -36,6 +44,7 @@ class OpenAiLLM {
} }
streamingEnabled() { streamingEnabled() {
if (this.isO1Model) return false;
return "streamGetChatCompletion" in this; return "streamGetChatCompletion" in this;
} }
@ -98,8 +107,11 @@ class OpenAiLLM {
userPrompt = "", userPrompt = "",
attachments = [], // This is the specific attachment for only this prompt attachments = [], // This is the specific attachment for only this prompt
}) { }) {
// o1 Models do not support the "system" role
// in order to combat this, we can use the "user" role as a replacement for now
// https://community.openai.com/t/o1-models-do-not-support-system-role-in-chat-completion/953880
const prompt = { const prompt = {
role: "system", role: this.isO1Model ? "user" : "system",
content: `${systemPrompt}${this.#appendContext(contextTexts)}`, content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
}; };
return [ return [
@ -122,7 +134,7 @@ class OpenAiLLM {
.create({ .create({
model: this.model, model: this.model,
messages, messages,
temperature, temperature: this.isO1Model ? 1 : temperature, // o1 models only accept temperature 1
}) })
.catch((e) => { .catch((e) => {
throw new Error(e.message); throw new Error(e.message);
@ -143,7 +155,7 @@ class OpenAiLLM {
model: this.model, model: this.model,
stream: true, stream: true,
messages, messages,
temperature, temperature: this.isO1Model ? 1 : temperature, // o1 models only accept temperature 1
}); });
return streamRequest; return streamRequest;
} }

View File

@ -7,6 +7,9 @@ function getTTSProvider() {
case "elevenlabs": case "elevenlabs":
const { ElevenLabsTTS } = require("./elevenLabs"); const { ElevenLabsTTS } = require("./elevenLabs");
return new ElevenLabsTTS(); return new ElevenLabsTTS();
case "generic-openai":
const { GenericOpenAiTTS } = require("./openAiGeneric");
return new GenericOpenAiTTS();
default: default:
throw new Error("ENV: No TTS_PROVIDER value found in environment!"); throw new Error("ENV: No TTS_PROVIDER value found in environment!");
} }

View File

@ -0,0 +1,50 @@
class GenericOpenAiTTS {
constructor() {
if (!process.env.TTS_OPEN_AI_COMPATIBLE_KEY)
this.#log(
"No OpenAI compatible API key was set. You might need to set this to use your OpenAI compatible TTS service."
);
if (!process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL)
this.#log(
"No OpenAI compatible voice model was set. We will use the default voice model 'alloy'. This may not exist for your selected endpoint."
);
if (!process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT)
throw new Error(
"No OpenAI compatible endpoint was set. Please set this to use your OpenAI compatible TTS service."
);
const { OpenAI: OpenAIApi } = require("openai");
this.openai = new OpenAIApi({
apiKey: process.env.TTS_OPEN_AI_COMPATIBLE_KEY || null,
baseURL: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
});
this.voice = process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL ?? "alloy";
}
#log(text, ...args) {
console.log(`\x1b[32m[OpenAiGenericTTS]\x1b[0m ${text}`, ...args);
}
/**
* Generates a buffer from the given text input using the OpenAI compatible TTS service.
* @param {string} textInput - The text to be converted to audio.
* @returns {Promise<Buffer>} A buffer containing the audio data.
*/
async ttsBuffer(textInput) {
try {
const result = await this.openai.audio.speech.create({
model: "tts-1",
voice: this.voice,
input: textInput,
});
return Buffer.from(await result.arrayBuffer());
} catch (e) {
console.error(e);
}
return null;
}
}
module.exports = {
GenericOpenAiTTS,
};

View File

@ -128,7 +128,7 @@ async function openAiModels(apiKey = null) {
}); });
const gpts = allModels const gpts = allModels
.filter((model) => model.id.startsWith("gpt")) .filter((model) => model.id.startsWith("gpt") || model.id.startsWith("o1"))
.filter( .filter(
(model) => !model.id.includes("vision") && !model.id.includes("instruct") (model) => !model.id.includes("vision") && !model.id.includes("instruct")
) )

View File

@ -506,6 +506,20 @@ const KEY_MAPPING = {
checks: [], checks: [],
}, },
// OpenAI Generic TTS
TTSOpenAICompatibleKey: {
envKey: "TTS_OPEN_AI_COMPATIBLE_KEY",
checks: [],
},
TTSOpenAICompatibleVoiceModel: {
envKey: "TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL",
checks: [isNotEmpty],
},
TTSOpenAICompatibleEndpoint: {
envKey: "TTS_OPEN_AI_COMPATIBLE_ENDPOINT",
checks: [isValidURL],
},
// DeepSeek Options // DeepSeek Options
DeepSeekApiKey: { DeepSeekApiKey: {
envKey: "DEEPSEEK_API_KEY", envKey: "DEEPSEEK_API_KEY",
@ -589,6 +603,7 @@ function supportedTTSProvider(input = "") {
"openai", "openai",
"elevenlabs", "elevenlabs",
"piper_local", "piper_local",
"generic-openai",
].includes(input); ].includes(input);
return validSelection ? null : `${input} is not a valid TTS provider.`; return validSelection ? null : `${input} is not a valid TTS provider.`;
} }