mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-19 04:30:10 +01:00
Tts open ai compatible endpoints (#2487)
* Update OpenAI TTS config to allow a custom BaseURL * uncheck config file * break openai generic TTS into its own provider * add space * hide TTS on user msg --------- Co-authored-by: Adam <phazei@gmail.com>
This commit is contained in:
parent
fa528e0cf3
commit
3dc0f3f490
@ -219,6 +219,11 @@ GID='1000'
|
||||
# TTS_OPEN_AI_KEY=sk-example
|
||||
# TTS_OPEN_AI_VOICE_MODEL=nova
|
||||
|
||||
# TTS_PROVIDER="generic-openai"
|
||||
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
|
||||
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
|
||||
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
|
||||
|
||||
# TTS_PROVIDER="elevenlabs"
|
||||
# TTS_ELEVEN_LABS_KEY=
|
||||
# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel
|
||||
|
@ -0,0 +1,69 @@
|
||||
import React from "react";
|
||||
|
||||
export default function OpenAiGenericTextToSpeechOptions({ settings }) {
|
||||
return (
|
||||
<div className="w-full flex flex-col gap-y-7">
|
||||
<div className="flex gap-x-4">
|
||||
<div className="flex flex-col w-60">
|
||||
<div className="flex justify-between items-center mb-2">
|
||||
<label className="text-white text-sm font-semibold">Base URL</label>
|
||||
</div>
|
||||
<input
|
||||
type="url"
|
||||
name="TTSOpenAICompatibleEndpoint"
|
||||
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="http://localhost:7851/v1"
|
||||
defaultValue={settings?.TTSOpenAICompatibleEndpoint}
|
||||
required={false}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
||||
This should be the base URL of the OpenAI compatible TTS service you
|
||||
will generate TTS responses from.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-3">
|
||||
API Key
|
||||
</label>
|
||||
<input
|
||||
type="password"
|
||||
name="TTSOpenAICompatibleKey"
|
||||
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="API Key"
|
||||
defaultValue={
|
||||
settings?.TTSOpenAICompatibleKey ? "*".repeat(20) : ""
|
||||
}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
||||
Some TTS services require an API key to generate TTS responses -
|
||||
this is optional if your service does not require one.
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-3">
|
||||
Voice Model
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
name="TTSOpenAICompatibleVoiceModel"
|
||||
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="Your voice model identifier"
|
||||
defaultValue={settings?.TTSOpenAICompatibleVoiceModel}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
||||
Most TTS services will have several voice models available, this is
|
||||
the identifier for the voice model you want to use.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
@ -23,6 +23,7 @@ export default function TTSMessage({ slug, chatId, message }) {
|
||||
|
||||
switch (provider) {
|
||||
case "openai":
|
||||
case "generic-openai":
|
||||
case "elevenlabs":
|
||||
return <AsyncTTSMessage slug={slug} chatId={chatId} />;
|
||||
case "piper_local":
|
||||
|
@ -81,11 +81,13 @@ const HistoricalMessage = ({
|
||||
<div className="flex flex-col items-center">
|
||||
<ProfileImage role={role} workspace={workspace} />
|
||||
<div className="mt-1 -mb-10">
|
||||
<TTSMessage
|
||||
slug={workspace?.slug}
|
||||
chatId={chatId}
|
||||
message={message}
|
||||
/>
|
||||
{role === "assistant" && (
|
||||
<TTSMessage
|
||||
slug={workspace?.slug}
|
||||
chatId={chatId}
|
||||
message={message}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
{isEditing ? (
|
||||
|
BIN
frontend/src/media/ttsproviders/generic-openai.png
Normal file
BIN
frontend/src/media/ttsproviders/generic-openai.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 29 KiB |
@ -8,10 +8,13 @@ import OpenAiLogo from "@/media/llmprovider/openai.png";
|
||||
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
|
||||
import ElevenLabsIcon from "@/media/ttsproviders/elevenlabs.png";
|
||||
import PiperTTSIcon from "@/media/ttsproviders/piper.png";
|
||||
import GenericOpenAiLogo from "@/media/ttsproviders/generic-openai.png";
|
||||
|
||||
import BrowserNative from "@/components/TextToSpeech/BrowserNative";
|
||||
import OpenAiTTSOptions from "@/components/TextToSpeech/OpenAiOptions";
|
||||
import ElevenLabsTTSOptions from "@/components/TextToSpeech/ElevenLabsOptions";
|
||||
import PiperTTSOptions from "@/components/TextToSpeech/PiperTTSOptions";
|
||||
import OpenAiGenericTTSOptions from "@/components/TextToSpeech/OpenAiGenericOptions";
|
||||
|
||||
const PROVIDERS = [
|
||||
{
|
||||
@ -42,6 +45,14 @@ const PROVIDERS = [
|
||||
options: (settings) => <PiperTTSOptions settings={settings} />,
|
||||
description: "Run TTS models locally in your browser privately.",
|
||||
},
|
||||
{
|
||||
name: "OpenAI Compatible",
|
||||
value: "generic-openai",
|
||||
logo: GenericOpenAiLogo,
|
||||
options: (settings) => <OpenAiGenericTTSOptions settings={settings} />,
|
||||
description:
|
||||
"Connect to an OpenAI compatible TTS service running locally or remotely.",
|
||||
},
|
||||
];
|
||||
|
||||
export default function TextToSpeechProvider({ settings }) {
|
||||
|
@ -213,6 +213,11 @@ TTS_PROVIDER="native"
|
||||
# TTS_ELEVEN_LABS_KEY=
|
||||
# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel
|
||||
|
||||
# TTS_PROVIDER="generic-openai"
|
||||
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
|
||||
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
|
||||
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
|
||||
|
||||
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
||||
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
||||
# STORAGE_DIR= # absolute filesystem path with no trailing slash
|
||||
|
@ -221,12 +221,18 @@ const SystemSettings = {
|
||||
TextToSpeechProvider: process.env.TTS_PROVIDER || "native",
|
||||
TTSOpenAIKey: !!process.env.TTS_OPEN_AI_KEY,
|
||||
TTSOpenAIVoiceModel: process.env.TTS_OPEN_AI_VOICE_MODEL,
|
||||
|
||||
// Eleven Labs TTS
|
||||
TTSElevenLabsKey: !!process.env.TTS_ELEVEN_LABS_KEY,
|
||||
TTSElevenLabsVoiceModel: process.env.TTS_ELEVEN_LABS_VOICE_MODEL,
|
||||
// Piper TTS
|
||||
TTSPiperTTSVoiceModel:
|
||||
process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium",
|
||||
// OpenAI Generic TTS
|
||||
TTSOpenAICompatibleKey: !!process.env.TTS_OPEN_AI_COMPATIBLE_KEY,
|
||||
TTSOpenAICompatibleVoiceModel:
|
||||
process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
|
||||
TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
|
||||
|
||||
// --------------------------------------------------------
|
||||
// Agent Settings & Configs
|
||||
|
@ -7,6 +7,9 @@ function getTTSProvider() {
|
||||
case "elevenlabs":
|
||||
const { ElevenLabsTTS } = require("./elevenLabs");
|
||||
return new ElevenLabsTTS();
|
||||
case "generic-openai":
|
||||
const { GenericOpenAiTTS } = require("./openAiGeneric");
|
||||
return new GenericOpenAiTTS();
|
||||
default:
|
||||
throw new Error("ENV: No TTS_PROVIDER value found in environment!");
|
||||
}
|
||||
|
50
server/utils/TextToSpeech/openAiGeneric/index.js
Normal file
50
server/utils/TextToSpeech/openAiGeneric/index.js
Normal file
@ -0,0 +1,50 @@
|
||||
class GenericOpenAiTTS {
|
||||
constructor() {
|
||||
if (!process.env.TTS_OPEN_AI_COMPATIBLE_KEY)
|
||||
this.#log(
|
||||
"No OpenAI compatible API key was set. You might need to set this to use your OpenAI compatible TTS service."
|
||||
);
|
||||
if (!process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL)
|
||||
this.#log(
|
||||
"No OpenAI compatible voice model was set. We will use the default voice model 'alloy'. This may not exist for your selected endpoint."
|
||||
);
|
||||
if (!process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT)
|
||||
throw new Error(
|
||||
"No OpenAI compatible endpoint was set. Please set this to use your OpenAI compatible TTS service."
|
||||
);
|
||||
|
||||
const { OpenAI: OpenAIApi } = require("openai");
|
||||
this.openai = new OpenAIApi({
|
||||
apiKey: process.env.TTS_OPEN_AI_COMPATIBLE_KEY || null,
|
||||
baseURL: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
|
||||
});
|
||||
this.voice = process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL ?? "alloy";
|
||||
}
|
||||
|
||||
#log(text, ...args) {
|
||||
console.log(`\x1b[32m[OpenAiGenericTTS]\x1b[0m ${text}`, ...args);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a buffer from the given text input using the OpenAI compatible TTS service.
|
||||
* @param {string} textInput - The text to be converted to audio.
|
||||
* @returns {Promise<Buffer>} A buffer containing the audio data.
|
||||
*/
|
||||
async ttsBuffer(textInput) {
|
||||
try {
|
||||
const result = await this.openai.audio.speech.create({
|
||||
model: "tts-1",
|
||||
voice: this.voice,
|
||||
input: textInput,
|
||||
});
|
||||
return Buffer.from(await result.arrayBuffer());
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
GenericOpenAiTTS,
|
||||
};
|
@ -506,6 +506,20 @@ const KEY_MAPPING = {
|
||||
checks: [],
|
||||
},
|
||||
|
||||
// OpenAI Generic TTS
|
||||
TTSOpenAICompatibleKey: {
|
||||
envKey: "TTS_OPEN_AI_COMPATIBLE_KEY",
|
||||
checks: [],
|
||||
},
|
||||
TTSOpenAICompatibleVoiceModel: {
|
||||
envKey: "TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL",
|
||||
checks: [isNotEmpty],
|
||||
},
|
||||
TTSOpenAICompatibleEndpoint: {
|
||||
envKey: "TTS_OPEN_AI_COMPATIBLE_ENDPOINT",
|
||||
checks: [isValidURL],
|
||||
},
|
||||
|
||||
// DeepSeek Options
|
||||
DeepSeekApiKey: {
|
||||
envKey: "DEEPSEEK_API_KEY",
|
||||
@ -589,6 +603,7 @@ function supportedTTSProvider(input = "") {
|
||||
"openai",
|
||||
"elevenlabs",
|
||||
"piper_local",
|
||||
"generic-openai",
|
||||
].includes(input);
|
||||
return validSelection ? null : `${input} is not a valid TTS provider.`;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user