mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-19 12:40:09 +01:00
Merge branch 'master' of github.com:Mintplex-Labs/anything-llm into dark-mode
This commit is contained in:
commit
83b1949774
@ -219,6 +219,11 @@ GID='1000'
|
|||||||
# TTS_OPEN_AI_KEY=sk-example
|
# TTS_OPEN_AI_KEY=sk-example
|
||||||
# TTS_OPEN_AI_VOICE_MODEL=nova
|
# TTS_OPEN_AI_VOICE_MODEL=nova
|
||||||
|
|
||||||
|
# TTS_PROVIDER="generic-openai"
|
||||||
|
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
|
||||||
|
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
|
||||||
|
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
|
||||||
|
|
||||||
# TTS_PROVIDER="elevenlabs"
|
# TTS_PROVIDER="elevenlabs"
|
||||||
# TTS_ELEVEN_LABS_KEY=
|
# TTS_ELEVEN_LABS_KEY=
|
||||||
# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel
|
# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel
|
||||||
|
@ -0,0 +1,69 @@
|
|||||||
|
import React from "react";
|
||||||
|
|
||||||
|
export default function OpenAiGenericTextToSpeechOptions({ settings }) {
|
||||||
|
return (
|
||||||
|
<div className="w-full flex flex-col gap-y-7">
|
||||||
|
<div className="flex gap-x-4">
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<div className="flex justify-between items-center mb-2">
|
||||||
|
<label className="text-white text-sm font-semibold">Base URL</label>
|
||||||
|
</div>
|
||||||
|
<input
|
||||||
|
type="url"
|
||||||
|
name="TTSOpenAICompatibleEndpoint"
|
||||||
|
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||||
|
placeholder="http://localhost:7851/v1"
|
||||||
|
defaultValue={settings?.TTSOpenAICompatibleEndpoint}
|
||||||
|
required={false}
|
||||||
|
autoComplete="off"
|
||||||
|
spellCheck={false}
|
||||||
|
/>
|
||||||
|
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
||||||
|
This should be the base URL of the OpenAI compatible TTS service you
|
||||||
|
will generate TTS responses from.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-3">
|
||||||
|
API Key
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="password"
|
||||||
|
name="TTSOpenAICompatibleKey"
|
||||||
|
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||||
|
placeholder="API Key"
|
||||||
|
defaultValue={
|
||||||
|
settings?.TTSOpenAICompatibleKey ? "*".repeat(20) : ""
|
||||||
|
}
|
||||||
|
autoComplete="off"
|
||||||
|
spellCheck={false}
|
||||||
|
/>
|
||||||
|
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
||||||
|
Some TTS services require an API key to generate TTS responses -
|
||||||
|
this is optional if your service does not require one.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-3">
|
||||||
|
Voice Model
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
name="TTSOpenAICompatibleVoiceModel"
|
||||||
|
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||||
|
placeholder="Your voice model identifier"
|
||||||
|
defaultValue={settings?.TTSOpenAICompatibleVoiceModel}
|
||||||
|
required={true}
|
||||||
|
autoComplete="off"
|
||||||
|
spellCheck={false}
|
||||||
|
/>
|
||||||
|
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
||||||
|
Most TTS services will have several voice models available, this is
|
||||||
|
the identifier for the voice model you want to use.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
@ -23,6 +23,7 @@ export default function TTSMessage({ slug, chatId, message }) {
|
|||||||
|
|
||||||
switch (provider) {
|
switch (provider) {
|
||||||
case "openai":
|
case "openai":
|
||||||
|
case "generic-openai":
|
||||||
case "elevenlabs":
|
case "elevenlabs":
|
||||||
return <AsyncTTSMessage slug={slug} chatId={chatId} />;
|
return <AsyncTTSMessage slug={slug} chatId={chatId} />;
|
||||||
case "piper_local":
|
case "piper_local":
|
||||||
|
@ -76,11 +76,13 @@ const HistoricalMessage = ({
|
|||||||
<div className="flex flex-col items-center">
|
<div className="flex flex-col items-center">
|
||||||
<ProfileImage role={role} workspace={workspace} />
|
<ProfileImage role={role} workspace={workspace} />
|
||||||
<div className="mt-1 -mb-10">
|
<div className="mt-1 -mb-10">
|
||||||
<TTSMessage
|
{role === "assistant" && (
|
||||||
slug={workspace?.slug}
|
<TTSMessage
|
||||||
chatId={chatId}
|
slug={workspace?.slug}
|
||||||
message={message}
|
chatId={chatId}
|
||||||
/>
|
message={message}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{isEditing ? (
|
{isEditing ? (
|
||||||
|
BIN
frontend/src/media/ttsproviders/generic-openai.png
Normal file
BIN
frontend/src/media/ttsproviders/generic-openai.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 29 KiB |
@ -8,10 +8,13 @@ import OpenAiLogo from "@/media/llmprovider/openai.png";
|
|||||||
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
|
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
|
||||||
import ElevenLabsIcon from "@/media/ttsproviders/elevenlabs.png";
|
import ElevenLabsIcon from "@/media/ttsproviders/elevenlabs.png";
|
||||||
import PiperTTSIcon from "@/media/ttsproviders/piper.png";
|
import PiperTTSIcon from "@/media/ttsproviders/piper.png";
|
||||||
|
import GenericOpenAiLogo from "@/media/ttsproviders/generic-openai.png";
|
||||||
|
|
||||||
import BrowserNative from "@/components/TextToSpeech/BrowserNative";
|
import BrowserNative from "@/components/TextToSpeech/BrowserNative";
|
||||||
import OpenAiTTSOptions from "@/components/TextToSpeech/OpenAiOptions";
|
import OpenAiTTSOptions from "@/components/TextToSpeech/OpenAiOptions";
|
||||||
import ElevenLabsTTSOptions from "@/components/TextToSpeech/ElevenLabsOptions";
|
import ElevenLabsTTSOptions from "@/components/TextToSpeech/ElevenLabsOptions";
|
||||||
import PiperTTSOptions from "@/components/TextToSpeech/PiperTTSOptions";
|
import PiperTTSOptions from "@/components/TextToSpeech/PiperTTSOptions";
|
||||||
|
import OpenAiGenericTTSOptions from "@/components/TextToSpeech/OpenAiGenericOptions";
|
||||||
|
|
||||||
const PROVIDERS = [
|
const PROVIDERS = [
|
||||||
{
|
{
|
||||||
@ -42,6 +45,14 @@ const PROVIDERS = [
|
|||||||
options: (settings) => <PiperTTSOptions settings={settings} />,
|
options: (settings) => <PiperTTSOptions settings={settings} />,
|
||||||
description: "Run TTS models locally in your browser privately.",
|
description: "Run TTS models locally in your browser privately.",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "OpenAI Compatible",
|
||||||
|
value: "generic-openai",
|
||||||
|
logo: GenericOpenAiLogo,
|
||||||
|
options: (settings) => <OpenAiGenericTTSOptions settings={settings} />,
|
||||||
|
description:
|
||||||
|
"Connect to an OpenAI compatible TTS service running locally or remotely.",
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
export default function TextToSpeechProvider({ settings }) {
|
export default function TextToSpeechProvider({ settings }) {
|
||||||
|
@ -5,14 +5,30 @@ import paths from "@/utils/paths";
|
|||||||
import { useTranslation } from "react-i18next";
|
import { useTranslation } from "react-i18next";
|
||||||
import { Link, useParams } from "react-router-dom";
|
import { Link, useParams } from "react-router-dom";
|
||||||
|
|
||||||
// These models do NOT support function calling
|
/**
|
||||||
|
* These models do NOT support function calling
|
||||||
|
* or do not support system prompts
|
||||||
|
* and therefore are not supported for agents.
|
||||||
|
* @param {string} provider - The AI provider.
|
||||||
|
* @param {string} model - The model name.
|
||||||
|
* @returns {boolean} Whether the model is supported for agents.
|
||||||
|
*/
|
||||||
function supportedModel(provider, model = "") {
|
function supportedModel(provider, model = "") {
|
||||||
if (provider !== "openai") return true;
|
if (provider === "openai") {
|
||||||
return (
|
return (
|
||||||
["gpt-3.5-turbo-0301", "gpt-4-turbo-2024-04-09", "gpt-4-turbo"].includes(
|
[
|
||||||
model
|
"gpt-3.5-turbo-0301",
|
||||||
) === false
|
"gpt-4-turbo-2024-04-09",
|
||||||
);
|
"gpt-4-turbo",
|
||||||
|
"o1-preview",
|
||||||
|
"o1-preview-2024-09-12",
|
||||||
|
"o1-mini",
|
||||||
|
"o1-mini-2024-09-12",
|
||||||
|
].includes(model) === false
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
export default function AgentModelSelection({
|
export default function AgentModelSelection({
|
||||||
|
@ -213,6 +213,11 @@ TTS_PROVIDER="native"
|
|||||||
# TTS_ELEVEN_LABS_KEY=
|
# TTS_ELEVEN_LABS_KEY=
|
||||||
# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel
|
# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel
|
||||||
|
|
||||||
|
# TTS_PROVIDER="generic-openai"
|
||||||
|
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
|
||||||
|
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
|
||||||
|
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
|
||||||
|
|
||||||
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
||||||
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
||||||
# STORAGE_DIR= # absolute filesystem path with no trailing slash
|
# STORAGE_DIR= # absolute filesystem path with no trailing slash
|
||||||
|
@ -221,12 +221,18 @@ const SystemSettings = {
|
|||||||
TextToSpeechProvider: process.env.TTS_PROVIDER || "native",
|
TextToSpeechProvider: process.env.TTS_PROVIDER || "native",
|
||||||
TTSOpenAIKey: !!process.env.TTS_OPEN_AI_KEY,
|
TTSOpenAIKey: !!process.env.TTS_OPEN_AI_KEY,
|
||||||
TTSOpenAIVoiceModel: process.env.TTS_OPEN_AI_VOICE_MODEL,
|
TTSOpenAIVoiceModel: process.env.TTS_OPEN_AI_VOICE_MODEL,
|
||||||
|
|
||||||
// Eleven Labs TTS
|
// Eleven Labs TTS
|
||||||
TTSElevenLabsKey: !!process.env.TTS_ELEVEN_LABS_KEY,
|
TTSElevenLabsKey: !!process.env.TTS_ELEVEN_LABS_KEY,
|
||||||
TTSElevenLabsVoiceModel: process.env.TTS_ELEVEN_LABS_VOICE_MODEL,
|
TTSElevenLabsVoiceModel: process.env.TTS_ELEVEN_LABS_VOICE_MODEL,
|
||||||
// Piper TTS
|
// Piper TTS
|
||||||
TTSPiperTTSVoiceModel:
|
TTSPiperTTSVoiceModel:
|
||||||
process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium",
|
process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium",
|
||||||
|
// OpenAI Generic TTS
|
||||||
|
TTSOpenAICompatibleKey: !!process.env.TTS_OPEN_AI_COMPATIBLE_KEY,
|
||||||
|
TTSOpenAICompatibleVoiceModel:
|
||||||
|
process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
|
||||||
|
TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
|
||||||
|
|
||||||
// --------------------------------------------------------
|
// --------------------------------------------------------
|
||||||
// Agent Settings & Configs
|
// Agent Settings & Configs
|
||||||
|
@ -7,6 +7,20 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
|||||||
|
|
||||||
// Docs: https://js.langchain.com/v0.2/docs/integrations/chat/bedrock_converse
|
// Docs: https://js.langchain.com/v0.2/docs/integrations/chat/bedrock_converse
|
||||||
class AWSBedrockLLM {
|
class AWSBedrockLLM {
|
||||||
|
/**
|
||||||
|
* These models do not support system prompts
|
||||||
|
* It is not explicitly stated but it is observed that they do not use the system prompt
|
||||||
|
* in their responses and will crash when a system prompt is provided.
|
||||||
|
* We can add more models to this list as we discover them or new models are added.
|
||||||
|
* We may want to extend this list or make a user-config if using custom bedrock models.
|
||||||
|
*/
|
||||||
|
noSystemPromptModels = [
|
||||||
|
"amazon.titan-text-express-v1",
|
||||||
|
"amazon.titan-text-lite-v1",
|
||||||
|
"cohere.command-text-v14",
|
||||||
|
"cohere.command-light-text-v14",
|
||||||
|
];
|
||||||
|
|
||||||
constructor(embedder = null, modelPreference = null) {
|
constructor(embedder = null, modelPreference = null) {
|
||||||
if (!process.env.AWS_BEDROCK_LLM_ACCESS_KEY_ID)
|
if (!process.env.AWS_BEDROCK_LLM_ACCESS_KEY_ID)
|
||||||
throw new Error("No AWS Bedrock LLM profile id was set.");
|
throw new Error("No AWS Bedrock LLM profile id was set.");
|
||||||
@ -59,6 +73,22 @@ class AWSBedrockLLM {
|
|||||||
|
|
||||||
for (const chat of chats) {
|
for (const chat of chats) {
|
||||||
if (!roleToMessageMap.hasOwnProperty(chat.role)) continue;
|
if (!roleToMessageMap.hasOwnProperty(chat.role)) continue;
|
||||||
|
|
||||||
|
// When a model does not support system prompts, we need to handle it.
|
||||||
|
// We will add a new message that simulates the system prompt via a user message and AI response.
|
||||||
|
// This will allow the model to respond without crashing but we can still inject context.
|
||||||
|
if (
|
||||||
|
this.noSystemPromptModels.includes(this.model) &&
|
||||||
|
chat.role === "system"
|
||||||
|
) {
|
||||||
|
this.#log(
|
||||||
|
`Model does not support system prompts! Simulating system prompt via Human/AI message pairs.`
|
||||||
|
);
|
||||||
|
langchainChats.push(new HumanMessage({ content: chat.content }));
|
||||||
|
langchainChats.push(new AIMessage({ content: "Okay." }));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const MessageClass = roleToMessageMap[chat.role];
|
const MessageClass = roleToMessageMap[chat.role];
|
||||||
langchainChats.push(new MessageClass({ content: chat.content }));
|
langchainChats.push(new MessageClass({ content: chat.content }));
|
||||||
}
|
}
|
||||||
@ -78,6 +108,10 @@ class AWSBedrockLLM {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#log(text, ...args) {
|
||||||
|
console.log(`\x1b[32m[AWSBedrock]\x1b[0m ${text}`, ...args);
|
||||||
|
}
|
||||||
|
|
||||||
streamingEnabled() {
|
streamingEnabled() {
|
||||||
return "streamGetChatCompletion" in this;
|
return "streamGetChatCompletion" in this;
|
||||||
}
|
}
|
||||||
|
@ -52,6 +52,10 @@ const MODEL_MAP = {
|
|||||||
"gpt-4-turbo-preview": 128_000,
|
"gpt-4-turbo-preview": 128_000,
|
||||||
"gpt-4": 8_192,
|
"gpt-4": 8_192,
|
||||||
"gpt-4-32k": 32_000,
|
"gpt-4-32k": 32_000,
|
||||||
|
"o1-preview": 128_000,
|
||||||
|
"o1-preview-2024-09-12": 128_000,
|
||||||
|
"o1-mini": 128_000,
|
||||||
|
"o1-mini-2024-09-12": 128_000,
|
||||||
},
|
},
|
||||||
deepseek: {
|
deepseek: {
|
||||||
"deepseek-chat": 128_000,
|
"deepseek-chat": 128_000,
|
||||||
|
@ -23,6 +23,14 @@ class OpenAiLLM {
|
|||||||
this.defaultTemp = 0.7;
|
this.defaultTemp = 0.7;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the model is an o1 model.
|
||||||
|
* @returns {boolean}
|
||||||
|
*/
|
||||||
|
get isO1Model() {
|
||||||
|
return this.model.startsWith("o1");
|
||||||
|
}
|
||||||
|
|
||||||
#appendContext(contextTexts = []) {
|
#appendContext(contextTexts = []) {
|
||||||
if (!contextTexts || !contextTexts.length) return "";
|
if (!contextTexts || !contextTexts.length) return "";
|
||||||
return (
|
return (
|
||||||
@ -36,6 +44,7 @@ class OpenAiLLM {
|
|||||||
}
|
}
|
||||||
|
|
||||||
streamingEnabled() {
|
streamingEnabled() {
|
||||||
|
if (this.isO1Model) return false;
|
||||||
return "streamGetChatCompletion" in this;
|
return "streamGetChatCompletion" in this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,8 +107,11 @@ class OpenAiLLM {
|
|||||||
userPrompt = "",
|
userPrompt = "",
|
||||||
attachments = [], // This is the specific attachment for only this prompt
|
attachments = [], // This is the specific attachment for only this prompt
|
||||||
}) {
|
}) {
|
||||||
|
// o1 Models do not support the "system" role
|
||||||
|
// in order to combat this, we can use the "user" role as a replacement for now
|
||||||
|
// https://community.openai.com/t/o1-models-do-not-support-system-role-in-chat-completion/953880
|
||||||
const prompt = {
|
const prompt = {
|
||||||
role: "system",
|
role: this.isO1Model ? "user" : "system",
|
||||||
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
|
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
|
||||||
};
|
};
|
||||||
return [
|
return [
|
||||||
@ -122,7 +134,7 @@ class OpenAiLLM {
|
|||||||
.create({
|
.create({
|
||||||
model: this.model,
|
model: this.model,
|
||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature: this.isO1Model ? 1 : temperature, // o1 models only accept temperature 1
|
||||||
})
|
})
|
||||||
.catch((e) => {
|
.catch((e) => {
|
||||||
throw new Error(e.message);
|
throw new Error(e.message);
|
||||||
@ -143,7 +155,7 @@ class OpenAiLLM {
|
|||||||
model: this.model,
|
model: this.model,
|
||||||
stream: true,
|
stream: true,
|
||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature: this.isO1Model ? 1 : temperature, // o1 models only accept temperature 1
|
||||||
});
|
});
|
||||||
return streamRequest;
|
return streamRequest;
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,9 @@ function getTTSProvider() {
|
|||||||
case "elevenlabs":
|
case "elevenlabs":
|
||||||
const { ElevenLabsTTS } = require("./elevenLabs");
|
const { ElevenLabsTTS } = require("./elevenLabs");
|
||||||
return new ElevenLabsTTS();
|
return new ElevenLabsTTS();
|
||||||
|
case "generic-openai":
|
||||||
|
const { GenericOpenAiTTS } = require("./openAiGeneric");
|
||||||
|
return new GenericOpenAiTTS();
|
||||||
default:
|
default:
|
||||||
throw new Error("ENV: No TTS_PROVIDER value found in environment!");
|
throw new Error("ENV: No TTS_PROVIDER value found in environment!");
|
||||||
}
|
}
|
||||||
|
50
server/utils/TextToSpeech/openAiGeneric/index.js
Normal file
50
server/utils/TextToSpeech/openAiGeneric/index.js
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
class GenericOpenAiTTS {
|
||||||
|
constructor() {
|
||||||
|
if (!process.env.TTS_OPEN_AI_COMPATIBLE_KEY)
|
||||||
|
this.#log(
|
||||||
|
"No OpenAI compatible API key was set. You might need to set this to use your OpenAI compatible TTS service."
|
||||||
|
);
|
||||||
|
if (!process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL)
|
||||||
|
this.#log(
|
||||||
|
"No OpenAI compatible voice model was set. We will use the default voice model 'alloy'. This may not exist for your selected endpoint."
|
||||||
|
);
|
||||||
|
if (!process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT)
|
||||||
|
throw new Error(
|
||||||
|
"No OpenAI compatible endpoint was set. Please set this to use your OpenAI compatible TTS service."
|
||||||
|
);
|
||||||
|
|
||||||
|
const { OpenAI: OpenAIApi } = require("openai");
|
||||||
|
this.openai = new OpenAIApi({
|
||||||
|
apiKey: process.env.TTS_OPEN_AI_COMPATIBLE_KEY || null,
|
||||||
|
baseURL: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
|
||||||
|
});
|
||||||
|
this.voice = process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL ?? "alloy";
|
||||||
|
}
|
||||||
|
|
||||||
|
#log(text, ...args) {
|
||||||
|
console.log(`\x1b[32m[OpenAiGenericTTS]\x1b[0m ${text}`, ...args);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates a buffer from the given text input using the OpenAI compatible TTS service.
|
||||||
|
* @param {string} textInput - The text to be converted to audio.
|
||||||
|
* @returns {Promise<Buffer>} A buffer containing the audio data.
|
||||||
|
*/
|
||||||
|
async ttsBuffer(textInput) {
|
||||||
|
try {
|
||||||
|
const result = await this.openai.audio.speech.create({
|
||||||
|
model: "tts-1",
|
||||||
|
voice: this.voice,
|
||||||
|
input: textInput,
|
||||||
|
});
|
||||||
|
return Buffer.from(await result.arrayBuffer());
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
GenericOpenAiTTS,
|
||||||
|
};
|
@ -128,7 +128,7 @@ async function openAiModels(apiKey = null) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const gpts = allModels
|
const gpts = allModels
|
||||||
.filter((model) => model.id.startsWith("gpt"))
|
.filter((model) => model.id.startsWith("gpt") || model.id.startsWith("o1"))
|
||||||
.filter(
|
.filter(
|
||||||
(model) => !model.id.includes("vision") && !model.id.includes("instruct")
|
(model) => !model.id.includes("vision") && !model.id.includes("instruct")
|
||||||
)
|
)
|
||||||
|
@ -506,6 +506,20 @@ const KEY_MAPPING = {
|
|||||||
checks: [],
|
checks: [],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// OpenAI Generic TTS
|
||||||
|
TTSOpenAICompatibleKey: {
|
||||||
|
envKey: "TTS_OPEN_AI_COMPATIBLE_KEY",
|
||||||
|
checks: [],
|
||||||
|
},
|
||||||
|
TTSOpenAICompatibleVoiceModel: {
|
||||||
|
envKey: "TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL",
|
||||||
|
checks: [isNotEmpty],
|
||||||
|
},
|
||||||
|
TTSOpenAICompatibleEndpoint: {
|
||||||
|
envKey: "TTS_OPEN_AI_COMPATIBLE_ENDPOINT",
|
||||||
|
checks: [isValidURL],
|
||||||
|
},
|
||||||
|
|
||||||
// DeepSeek Options
|
// DeepSeek Options
|
||||||
DeepSeekApiKey: {
|
DeepSeekApiKey: {
|
||||||
envKey: "DEEPSEEK_API_KEY",
|
envKey: "DEEPSEEK_API_KEY",
|
||||||
@ -589,6 +603,7 @@ function supportedTTSProvider(input = "") {
|
|||||||
"openai",
|
"openai",
|
||||||
"elevenlabs",
|
"elevenlabs",
|
||||||
"piper_local",
|
"piper_local",
|
||||||
|
"generic-openai",
|
||||||
].includes(input);
|
].includes(input);
|
||||||
return validSelection ? null : `${input} is not a valid TTS provider.`;
|
return validSelection ? null : `${input} is not a valid TTS provider.`;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user