mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-10 17:00:11 +01:00
Add Speech-to-text and Text-to-speech providers (#1394)
* Add Speech-to-text and Text-to-speech providers * add files and update comment * update comments * patch: bad playerRef check
This commit is contained in:
parent
d71db22799
commit
b6be43be95
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
@ -11,6 +11,7 @@
|
|||||||
"cooldowns",
|
"cooldowns",
|
||||||
"Deduplicator",
|
"Deduplicator",
|
||||||
"Dockerized",
|
"Dockerized",
|
||||||
|
"elevenlabs",
|
||||||
"Embeddable",
|
"Embeddable",
|
||||||
"epub",
|
"epub",
|
||||||
"GROQ",
|
"GROQ",
|
||||||
|
@ -171,6 +171,19 @@ GID='1000'
|
|||||||
# WHISPER_PROVIDER="openai"
|
# WHISPER_PROVIDER="openai"
|
||||||
# OPEN_AI_KEY=sk-xxxxxxxx
|
# OPEN_AI_KEY=sk-xxxxxxxx
|
||||||
|
|
||||||
|
###########################################
|
||||||
|
######## TTS/STT Model Selection ##########
|
||||||
|
###########################################
|
||||||
|
# TTS_PROVIDER="native"
|
||||||
|
|
||||||
|
# TTS_PROVIDER="openai"
|
||||||
|
# TTS_OPEN_AI_KEY=sk-example
|
||||||
|
# TTS_OPEN_AI_VOICE_MODEL=nova
|
||||||
|
|
||||||
|
# TTS_PROVIDER="elevenlabs"
|
||||||
|
# TTS_ELEVEN_LABS_KEY=
|
||||||
|
# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel
|
||||||
|
|
||||||
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
||||||
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
||||||
# DISABLE_TELEMETRY="false"
|
# DISABLE_TELEMETRY="false"
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
"react-dropzone": "^14.2.3",
|
"react-dropzone": "^14.2.3",
|
||||||
"react-loading-skeleton": "^3.1.0",
|
"react-loading-skeleton": "^3.1.0",
|
||||||
"react-router-dom": "^6.3.0",
|
"react-router-dom": "^6.3.0",
|
||||||
|
"react-speech-recognition": "^3.10.0",
|
||||||
"react-tag-input-component": "^2.0.2",
|
"react-tag-input-component": "^2.0.2",
|
||||||
"react-toastify": "^9.1.3",
|
"react-toastify": "^9.1.3",
|
||||||
"react-tooltip": "^5.25.2",
|
"react-tooltip": "^5.25.2",
|
||||||
|
@ -32,6 +32,9 @@ const GeneralLLMPreference = lazy(
|
|||||||
const GeneralTranscriptionPreference = lazy(
|
const GeneralTranscriptionPreference = lazy(
|
||||||
() => import("@/pages/GeneralSettings/TranscriptionPreference")
|
() => import("@/pages/GeneralSettings/TranscriptionPreference")
|
||||||
);
|
);
|
||||||
|
const GeneralAudioPreference = lazy(
|
||||||
|
() => import("@/pages/GeneralSettings/AudioPreference")
|
||||||
|
);
|
||||||
const GeneralEmbeddingPreference = lazy(
|
const GeneralEmbeddingPreference = lazy(
|
||||||
() => import("@/pages/GeneralSettings/EmbeddingPreference")
|
() => import("@/pages/GeneralSettings/EmbeddingPreference")
|
||||||
);
|
);
|
||||||
@ -85,6 +88,10 @@ export default function App() {
|
|||||||
<AdminRoute Component={GeneralTranscriptionPreference} />
|
<AdminRoute Component={GeneralTranscriptionPreference} />
|
||||||
}
|
}
|
||||||
/>
|
/>
|
||||||
|
<Route
|
||||||
|
path="/settings/audio-preference"
|
||||||
|
element={<AdminRoute Component={GeneralAudioPreference} />}
|
||||||
|
/>
|
||||||
<Route
|
<Route
|
||||||
path="/settings/embedding-preference"
|
path="/settings/embedding-preference"
|
||||||
element={<AdminRoute Component={GeneralEmbeddingPreference} />}
|
element={<AdminRoute Component={GeneralEmbeddingPreference} />}
|
||||||
|
@ -21,6 +21,7 @@ import {
|
|||||||
ClosedCaptioning,
|
ClosedCaptioning,
|
||||||
EyeSlash,
|
EyeSlash,
|
||||||
SplitVertical,
|
SplitVertical,
|
||||||
|
Microphone,
|
||||||
} from "@phosphor-icons/react";
|
} from "@phosphor-icons/react";
|
||||||
import useUser from "@/hooks/useUser";
|
import useUser from "@/hooks/useUser";
|
||||||
import { USER_BACKGROUND_COLOR } from "@/utils/constants";
|
import { USER_BACKGROUND_COLOR } from "@/utils/constants";
|
||||||
@ -280,6 +281,14 @@ const SidebarOptions = ({ user = null }) => (
|
|||||||
flex={true}
|
flex={true}
|
||||||
allowedRole={["admin"]}
|
allowedRole={["admin"]}
|
||||||
/>
|
/>
|
||||||
|
<Option
|
||||||
|
href={paths.settings.audioPreference()}
|
||||||
|
btnText="Voice and Speech Support"
|
||||||
|
icon={<Microphone className="h-5 w-5 flex-shrink-0" />}
|
||||||
|
user={user}
|
||||||
|
flex={true}
|
||||||
|
allowedRole={["admin"]}
|
||||||
|
/>
|
||||||
<Option
|
<Option
|
||||||
href={paths.settings.transcriptionPreference()}
|
href={paths.settings.transcriptionPreference()}
|
||||||
btnText="Transcription Model"
|
btnText="Transcription Model"
|
||||||
|
@ -0,0 +1,9 @@
|
|||||||
|
export default function BrowserNative() {
|
||||||
|
return (
|
||||||
|
<div className="w-full h-10 items-center flex">
|
||||||
|
<p className="text-sm font-base text-white text-opacity-60">
|
||||||
|
There is no configuration needed for this provider.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
@ -0,0 +1,9 @@
|
|||||||
|
export default function BrowserNative() {
|
||||||
|
return (
|
||||||
|
<div className="w-full h-10 items-center flex">
|
||||||
|
<p className="text-sm font-base text-white text-opacity-60">
|
||||||
|
There is no configuration needed for this provider.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
107
frontend/src/components/TextToSpeech/ElevenLabsOptions/index.jsx
Normal file
107
frontend/src/components/TextToSpeech/ElevenLabsOptions/index.jsx
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
import { useState, useEffect } from "react";
|
||||||
|
import System from "@/models/system";
|
||||||
|
|
||||||
|
export default function ElevenLabsOptions({ settings }) {
|
||||||
|
const [inputValue, setInputValue] = useState(settings?.TTSElevenLabsKey);
|
||||||
|
const [openAIKey, setOpenAIKey] = useState(settings?.TTSElevenLabsKey);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex gap-x-4">
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
|
API Key
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="password"
|
||||||
|
name="TTSElevenLabsKey"
|
||||||
|
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||||
|
placeholder="ElevenLabs API Key"
|
||||||
|
defaultValue={settings?.TTSElevenLabsKey ? "*".repeat(20) : ""}
|
||||||
|
required={true}
|
||||||
|
autoComplete="off"
|
||||||
|
spellCheck={false}
|
||||||
|
onChange={(e) => setInputValue(e.target.value)}
|
||||||
|
onBlur={() => setOpenAIKey(inputValue)}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
{!settings?.credentialsOnly && (
|
||||||
|
<ElevenLabsModelSelection settings={settings} apiKey={openAIKey} />
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function ElevenLabsModelSelection({ apiKey, settings }) {
|
||||||
|
const [groupedModels, setGroupedModels] = useState({});
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
async function findCustomModels() {
|
||||||
|
setLoading(true);
|
||||||
|
const { models } = await System.customModels(
|
||||||
|
"elevenlabs-tts",
|
||||||
|
typeof apiKey === "boolean" ? null : apiKey
|
||||||
|
);
|
||||||
|
|
||||||
|
if (models?.length > 0) {
|
||||||
|
const modelsByOrganization = models.reduce((acc, model) => {
|
||||||
|
acc[model.organization] = acc[model.organization] || [];
|
||||||
|
acc[model.organization].push(model);
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
setGroupedModels(modelsByOrganization);
|
||||||
|
}
|
||||||
|
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
findCustomModels();
|
||||||
|
}, [apiKey]);
|
||||||
|
|
||||||
|
if (loading) {
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
|
Chat Model Selection
|
||||||
|
</label>
|
||||||
|
<select
|
||||||
|
name="TTSElevenLabsVoiceModel"
|
||||||
|
disabled={true}
|
||||||
|
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||||
|
>
|
||||||
|
<option disabled={true} selected={true}>
|
||||||
|
-- loading available models --
|
||||||
|
</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
|
Chat Model Selection
|
||||||
|
</label>
|
||||||
|
<select
|
||||||
|
name="TTSElevenLabsVoiceModel"
|
||||||
|
required={true}
|
||||||
|
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||||
|
>
|
||||||
|
{Object.keys(groupedModels)
|
||||||
|
.sort()
|
||||||
|
.map((organization) => (
|
||||||
|
<optgroup key={organization} label={organization}>
|
||||||
|
{groupedModels[organization].map((model) => (
|
||||||
|
<option
|
||||||
|
key={model.id}
|
||||||
|
value={model.id}
|
||||||
|
selected={settings?.OpenAiModelPref === model.id}
|
||||||
|
>
|
||||||
|
{model.name}
|
||||||
|
</option>
|
||||||
|
))}
|
||||||
|
</optgroup>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
45
frontend/src/components/TextToSpeech/OpenAiOptions/index.jsx
Normal file
45
frontend/src/components/TextToSpeech/OpenAiOptions/index.jsx
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
function toProperCase(string) {
|
||||||
|
return string.replace(/\w\S*/g, function (txt) {
|
||||||
|
return txt.charAt(0).toUpperCase() + txt.substr(1).toLowerCase();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function OpenAiTextToSpeechOptions({ settings }) {
|
||||||
|
const apiKey = settings?.TTSOpenAIKey;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex gap-x-4">
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
|
API Key
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="password"
|
||||||
|
name="TTSOpenAIKey"
|
||||||
|
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||||
|
placeholder="OpenAI API Key"
|
||||||
|
defaultValue={apiKey ? "*".repeat(20) : ""}
|
||||||
|
required={true}
|
||||||
|
autoComplete="off"
|
||||||
|
spellCheck={false}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
|
Voice Model
|
||||||
|
</label>
|
||||||
|
<select
|
||||||
|
name="TTSOpenAIVoiceModel"
|
||||||
|
defaultValue={settings?.TTSOpenAIVoiceModel ?? "alloy"}
|
||||||
|
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||||
|
>
|
||||||
|
{["alloy", "echo", "fable", "onyx", "nova", "shimmer"].map(
|
||||||
|
(voice) => {
|
||||||
|
return <option value={voice}>{toProperCase(voice)}</option>;
|
||||||
|
}
|
||||||
|
)}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
@ -0,0 +1,94 @@
|
|||||||
|
import { useEffect, useState, useRef } from "react";
|
||||||
|
import { SpeakerHigh, PauseCircle, CircleNotch } from "@phosphor-icons/react";
|
||||||
|
import { Tooltip } from "react-tooltip";
|
||||||
|
import Workspace from "@/models/workspace";
|
||||||
|
import showToast from "@/utils/toast";
|
||||||
|
|
||||||
|
export default function AsyncTTSMessage({ slug, chatId }) {
|
||||||
|
const playerRef = useRef(null);
|
||||||
|
const [speaking, setSpeaking] = useState(false);
|
||||||
|
const [loading, setLoading] = useState(false);
|
||||||
|
const [audioSrc, setAudioSrc] = useState(null);
|
||||||
|
|
||||||
|
function speakMessage() {
|
||||||
|
if (speaking) {
|
||||||
|
playerRef?.current?.pause();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!audioSrc) {
|
||||||
|
setLoading(true);
|
||||||
|
Workspace.ttsMessage(slug, chatId)
|
||||||
|
.then((audioBlob) => {
|
||||||
|
if (!audioBlob)
|
||||||
|
throw new Error("Failed to load or play TTS message response.");
|
||||||
|
setAudioSrc(audioBlob);
|
||||||
|
})
|
||||||
|
.catch((e) => showToast(e.message, "error", { clear: true }))
|
||||||
|
.finally(() => setLoading(false));
|
||||||
|
} else {
|
||||||
|
playerRef.current.play();
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e);
|
||||||
|
setLoading(false);
|
||||||
|
setSpeaking(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
function setupPlayer() {
|
||||||
|
if (!playerRef?.current) return;
|
||||||
|
playerRef.current.addEventListener("play", () => {
|
||||||
|
setSpeaking(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
playerRef.current.addEventListener("pause", () => {
|
||||||
|
playerRef.current.currentTime = 0;
|
||||||
|
setSpeaking(false);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
setupPlayer();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
if (!chatId) return null;
|
||||||
|
return (
|
||||||
|
<div className="mt-3 relative">
|
||||||
|
<button
|
||||||
|
onClick={speakMessage}
|
||||||
|
data-tooltip-id="message-to-speech"
|
||||||
|
data-tooltip-content={
|
||||||
|
speaking ? "Pause TTS speech of message" : "TTS Speak message"
|
||||||
|
}
|
||||||
|
className="border-none text-zinc-300"
|
||||||
|
aria-label={speaking ? "Pause speech" : "Speak message"}
|
||||||
|
>
|
||||||
|
{speaking ? (
|
||||||
|
<PauseCircle size={18} className="mb-1" />
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
{loading ? (
|
||||||
|
<CircleNotch size={18} className="mb-1 animate-spin" />
|
||||||
|
) : (
|
||||||
|
<SpeakerHigh size={18} className="mb-1" />
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
<audio
|
||||||
|
ref={playerRef}
|
||||||
|
hidden={true}
|
||||||
|
src={audioSrc}
|
||||||
|
autoPlay={true}
|
||||||
|
controls={false}
|
||||||
|
/>
|
||||||
|
</button>
|
||||||
|
<Tooltip
|
||||||
|
id="message-to-speech"
|
||||||
|
place="bottom"
|
||||||
|
delayShow={300}
|
||||||
|
className="tooltip !text-xs"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
@ -0,0 +1,23 @@
|
|||||||
|
import { useEffect, useState } from "react";
|
||||||
|
import NativeTTSMessage from "./native";
|
||||||
|
import AsyncTTSMessage from "./asyncTts";
|
||||||
|
import System from "@/models/system";
|
||||||
|
|
||||||
|
export default function TTSMessage({ slug, chatId, message }) {
|
||||||
|
const [provider, setProvider] = useState("native");
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
async function getSettings() {
|
||||||
|
const _settings = await System.keys();
|
||||||
|
setProvider(_settings?.TextToSpeechProvider ?? "native");
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
getSettings();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
if (loading) return null;
|
||||||
|
if (provider !== "native")
|
||||||
|
return <AsyncTTSMessage slug={slug} chatId={chatId} />;
|
||||||
|
return <NativeTTSMessage message={message} />;
|
||||||
|
}
|
@ -0,0 +1,61 @@
|
|||||||
|
import React, { useEffect, useState } from "react";
|
||||||
|
import { SpeakerHigh, PauseCircle } from "@phosphor-icons/react";
|
||||||
|
import { Tooltip } from "react-tooltip";
|
||||||
|
|
||||||
|
export default function NativeTTSMessage({ message }) {
|
||||||
|
const [speaking, setSpeaking] = useState(false);
|
||||||
|
const [supported, setSupported] = useState(false);
|
||||||
|
useEffect(() => {
|
||||||
|
setSupported("speechSynthesis" in window);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
function endSpeechUtterance() {
|
||||||
|
window.speechSynthesis?.cancel();
|
||||||
|
setSpeaking(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
function speakMessage() {
|
||||||
|
// if the user is pausing this particular message
|
||||||
|
// while the synth is speaking we can end it.
|
||||||
|
// If they are clicking another message's TTS
|
||||||
|
// we need to ignore that until they pause the one that is playing.
|
||||||
|
if (window.speechSynthesis.speaking && speaking) {
|
||||||
|
endSpeechUtterance();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (window.speechSynthesis.speaking && !speaking) return;
|
||||||
|
const utterance = new SpeechSynthesisUtterance(message);
|
||||||
|
utterance.addEventListener("end", endSpeechUtterance);
|
||||||
|
window.speechSynthesis.speak(utterance);
|
||||||
|
setSpeaking(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!supported) return null;
|
||||||
|
return (
|
||||||
|
<div className="mt-3 relative">
|
||||||
|
<button
|
||||||
|
onClick={speakMessage}
|
||||||
|
data-tooltip-id="message-to-speech"
|
||||||
|
data-tooltip-content={
|
||||||
|
speaking ? "Pause TTS speech of message" : "TTS Speak message"
|
||||||
|
}
|
||||||
|
className="border-none text-zinc-300"
|
||||||
|
aria-label={speaking ? "Pause speech" : "Speak message"}
|
||||||
|
>
|
||||||
|
{speaking ? (
|
||||||
|
<PauseCircle size={18} className="mb-1" />
|
||||||
|
) : (
|
||||||
|
<SpeakerHigh size={18} className="mb-1" />
|
||||||
|
)}
|
||||||
|
</button>
|
||||||
|
<Tooltip
|
||||||
|
id="message-to-speech"
|
||||||
|
place="bottom"
|
||||||
|
delayShow={300}
|
||||||
|
className="tooltip !text-xs"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
import React, { memo, useEffect, useState } from "react";
|
import React, { memo, useState } from "react";
|
||||||
import useCopyText from "@/hooks/useCopyText";
|
import useCopyText from "@/hooks/useCopyText";
|
||||||
import {
|
import {
|
||||||
Check,
|
Check,
|
||||||
@ -6,11 +6,10 @@ import {
|
|||||||
ThumbsUp,
|
ThumbsUp,
|
||||||
ThumbsDown,
|
ThumbsDown,
|
||||||
ArrowsClockwise,
|
ArrowsClockwise,
|
||||||
SpeakerHigh,
|
|
||||||
PauseCircle,
|
|
||||||
} from "@phosphor-icons/react";
|
} from "@phosphor-icons/react";
|
||||||
import { Tooltip } from "react-tooltip";
|
import { Tooltip } from "react-tooltip";
|
||||||
import Workspace from "@/models/workspace";
|
import Workspace from "@/models/workspace";
|
||||||
|
import TTSMessage from "./TTSButton";
|
||||||
|
|
||||||
const Actions = ({
|
const Actions = ({
|
||||||
message,
|
message,
|
||||||
@ -60,7 +59,7 @@ const Actions = ({
|
|||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
<TTSMessage message={message} />
|
<TTSMessage slug={slug} chatId={chatId} message={message} />
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
@ -149,62 +148,4 @@ function RegenerateMessage({ regenerateMessage, chatId }) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function TTSMessage({ message }) {
|
|
||||||
const [speaking, setSpeaking] = useState(false);
|
|
||||||
const [supported, setSupported] = useState(false);
|
|
||||||
useEffect(() => {
|
|
||||||
setSupported("speechSynthesis" in window);
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
function endSpeechUtterance() {
|
|
||||||
window.speechSynthesis?.cancel();
|
|
||||||
setSpeaking(false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
function speakMessage() {
|
|
||||||
// if the user is pausing this particular message
|
|
||||||
// while the synth if speaking we can end it.
|
|
||||||
// If they are clicking another message's TTS
|
|
||||||
// we need to ignore that until they pause the one that is playing.
|
|
||||||
if (window.speechSynthesis.speaking && speaking) {
|
|
||||||
endSpeechUtterance();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (window.speechSynthesis.speaking && !speaking) return;
|
|
||||||
const utterance = new SpeechSynthesisUtterance(message);
|
|
||||||
utterance.addEventListener("end", endSpeechUtterance);
|
|
||||||
window.speechSynthesis.speak(utterance);
|
|
||||||
setSpeaking(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!supported) return null;
|
|
||||||
return (
|
|
||||||
<div className="mt-3 relative">
|
|
||||||
<button
|
|
||||||
onClick={speakMessage}
|
|
||||||
data-tooltip-id="message-to-speech"
|
|
||||||
data-tooltip-content={
|
|
||||||
speaking ? "Pause TTS speech of message" : "TTS Speak message"
|
|
||||||
}
|
|
||||||
className="border-none text-zinc-300"
|
|
||||||
aria-label={speaking ? "Pause speech" : "Speak message"}
|
|
||||||
>
|
|
||||||
{speaking ? (
|
|
||||||
<PauseCircle size={18} className="mb-1" />
|
|
||||||
) : (
|
|
||||||
<SpeakerHigh size={18} className="mb-1" />
|
|
||||||
)}
|
|
||||||
</button>
|
|
||||||
<Tooltip
|
|
||||||
id="message-to-speech"
|
|
||||||
place="bottom"
|
|
||||||
delayShow={300}
|
|
||||||
className="tooltip !text-xs"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
export default memo(Actions);
|
export default memo(Actions);
|
||||||
|
@ -0,0 +1,82 @@
|
|||||||
|
import { useEffect } from "react";
|
||||||
|
import { Microphone } from "@phosphor-icons/react";
|
||||||
|
import { Tooltip } from "react-tooltip";
|
||||||
|
import _regeneratorRuntime from "regenerator-runtime";
|
||||||
|
import SpeechRecognition, {
|
||||||
|
useSpeechRecognition,
|
||||||
|
} from "react-speech-recognition";
|
||||||
|
|
||||||
|
let timeout;
|
||||||
|
const SILENCE_INTERVAL = 3_200; // wait in seconds of silence before closing.
|
||||||
|
export default function SpeechToText({ sendCommand }) {
|
||||||
|
const {
|
||||||
|
transcript,
|
||||||
|
listening,
|
||||||
|
resetTranscript,
|
||||||
|
browserSupportsSpeechRecognition,
|
||||||
|
browserSupportsContinuousListening,
|
||||||
|
isMicrophoneAvailable,
|
||||||
|
} = useSpeechRecognition({
|
||||||
|
clearTranscriptOnListen: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
function startSTTSession() {
|
||||||
|
if (!isMicrophoneAvailable) {
|
||||||
|
alert(
|
||||||
|
"AnythingLLM does not have access to microphone. Please enable for this site to use this feature."
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
resetTranscript();
|
||||||
|
SpeechRecognition.startListening({
|
||||||
|
continuous: browserSupportsContinuousListening,
|
||||||
|
language: window?.navigator?.language ?? "en-US",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function endTTSSession() {
|
||||||
|
SpeechRecognition.stopListening();
|
||||||
|
if (transcript.length > 0) {
|
||||||
|
sendCommand(transcript, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
resetTranscript();
|
||||||
|
clearTimeout(timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (transcript?.length > 0) {
|
||||||
|
sendCommand(transcript, false);
|
||||||
|
clearTimeout(timeout);
|
||||||
|
timeout = setTimeout(() => {
|
||||||
|
endTTSSession();
|
||||||
|
}, SILENCE_INTERVAL);
|
||||||
|
}
|
||||||
|
}, [transcript]);
|
||||||
|
|
||||||
|
if (!browserSupportsSpeechRecognition) return null;
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
id="text-size-btn"
|
||||||
|
data-tooltip-id="tooltip-text-size-btn"
|
||||||
|
data-tooltip-content="Speak your prompt"
|
||||||
|
aria-label="Speak your prompt"
|
||||||
|
onClick={listening ? endTTSSession : startSTTSession}
|
||||||
|
className={`relative flex justify-center items-center opacity-60 hover:opacity-100 cursor-pointer ${
|
||||||
|
!!listening ? "!opacity-100" : ""
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
<Microphone
|
||||||
|
weight="fill"
|
||||||
|
className="w-6 h-6 pointer-events-none text-white"
|
||||||
|
/>
|
||||||
|
<Tooltip
|
||||||
|
id="tooltip-text-size-btn"
|
||||||
|
place="top"
|
||||||
|
delayShow={300}
|
||||||
|
className="tooltip !text-xs z-99"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
@ -12,6 +12,7 @@ import AvailableAgentsButton, {
|
|||||||
useAvailableAgents,
|
useAvailableAgents,
|
||||||
} from "./AgentMenu";
|
} from "./AgentMenu";
|
||||||
import TextSizeButton from "./TextSizeMenu";
|
import TextSizeButton from "./TextSizeMenu";
|
||||||
|
import SpeechToText from "./SpeechToText";
|
||||||
|
|
||||||
export const PROMPT_INPUT_EVENT = "set_prompt_input";
|
export const PROMPT_INPUT_EVENT = "set_prompt_input";
|
||||||
export default function PromptInput({
|
export default function PromptInput({
|
||||||
@ -34,6 +35,7 @@ export default function PromptInput({
|
|||||||
function handlePromptUpdate(e) {
|
function handlePromptUpdate(e) {
|
||||||
setPromptInput(e?.detail ?? "");
|
setPromptInput(e?.detail ?? "");
|
||||||
}
|
}
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!!window)
|
if (!!window)
|
||||||
window.addEventListener(PROMPT_INPUT_EVENT, handlePromptUpdate);
|
window.addEventListener(PROMPT_INPUT_EVENT, handlePromptUpdate);
|
||||||
@ -156,6 +158,9 @@ export default function PromptInput({
|
|||||||
/>
|
/>
|
||||||
<TextSizeButton />
|
<TextSizeButton />
|
||||||
</div>
|
</div>
|
||||||
|
<div className="flex gap-x-2">
|
||||||
|
<SpeechToText sendCommand={sendCommand} />
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
BIN
frontend/src/media/ttsproviders/elevenlabs.png
Normal file
BIN
frontend/src/media/ttsproviders/elevenlabs.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 6.3 KiB |
@ -332,7 +332,7 @@ const System = {
|
|||||||
})
|
})
|
||||||
.then((blob) => (blob ? URL.createObjectURL(blob) : null))
|
.then((blob) => (blob ? URL.createObjectURL(blob) : null))
|
||||||
.catch((e) => {
|
.catch((e) => {
|
||||||
console.log(e);
|
// console.log(e);
|
||||||
return null;
|
return null;
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
|
@ -272,6 +272,21 @@ const Workspace = {
|
|||||||
return false;
|
return false;
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
|
ttsMessage: async function (slug, chatId) {
|
||||||
|
return await fetch(`${API_BASE}/workspace/${slug}/tts/${chatId}`, {
|
||||||
|
method: "GET",
|
||||||
|
cache: "no-cache",
|
||||||
|
headers: baseHeaders(),
|
||||||
|
})
|
||||||
|
.then((res) => {
|
||||||
|
if (res.ok && res.status !== 204) return res.blob();
|
||||||
|
throw new Error("Failed to fetch TTS.");
|
||||||
|
})
|
||||||
|
.then((blob) => (blob ? URL.createObjectURL(blob) : null))
|
||||||
|
.catch((e) => {
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
},
|
||||||
threads: WorkspaceThread,
|
threads: WorkspaceThread,
|
||||||
|
|
||||||
uploadPfp: async function (formData, slug) {
|
uploadPfp: async function (formData, slug) {
|
||||||
@ -302,7 +317,7 @@ const Workspace = {
|
|||||||
})
|
})
|
||||||
.then((blob) => (blob ? URL.createObjectURL(blob) : null))
|
.then((blob) => (blob ? URL.createObjectURL(blob) : null))
|
||||||
.catch((e) => {
|
.catch((e) => {
|
||||||
console.log(e);
|
// console.log(e);
|
||||||
return null;
|
return null;
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
|
45
frontend/src/pages/GeneralSettings/AudioPreference/index.jsx
Normal file
45
frontend/src/pages/GeneralSettings/AudioPreference/index.jsx
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
import React, { useEffect, useState, useRef } from "react";
|
||||||
|
import { isMobile } from "react-device-detect";
|
||||||
|
import Sidebar from "@/components/SettingsSidebar";
|
||||||
|
import System from "@/models/system";
|
||||||
|
import PreLoader from "@/components/Preloader";
|
||||||
|
import SpeechToTextProvider from "./stt";
|
||||||
|
import TextToSpeechProvider from "./tts";
|
||||||
|
|
||||||
|
export default function AudioPreference() {
|
||||||
|
const [settings, setSettings] = useState(null);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
async function fetchKeys() {
|
||||||
|
const _settings = await System.keys();
|
||||||
|
setSettings(_settings);
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
fetchKeys();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="w-screen h-screen overflow-hidden bg-sidebar flex">
|
||||||
|
<Sidebar />
|
||||||
|
{loading ? (
|
||||||
|
<div
|
||||||
|
style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
|
||||||
|
className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
|
||||||
|
>
|
||||||
|
<div className="w-full h-full flex justify-center items-center">
|
||||||
|
<PreLoader />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div
|
||||||
|
style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
|
||||||
|
className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
|
||||||
|
>
|
||||||
|
<SpeechToTextProvider settings={settings} />
|
||||||
|
<TextToSpeechProvider settings={settings} />
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
191
frontend/src/pages/GeneralSettings/AudioPreference/stt.jsx
Normal file
191
frontend/src/pages/GeneralSettings/AudioPreference/stt.jsx
Normal file
@ -0,0 +1,191 @@
|
|||||||
|
import React, { useEffect, useState, useRef } from "react";
|
||||||
|
import System from "@/models/system";
|
||||||
|
import showToast from "@/utils/toast";
|
||||||
|
import LLMItem from "@/components/LLMSelection/LLMItem";
|
||||||
|
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
|
||||||
|
import CTAButton from "@/components/lib/CTAButton";
|
||||||
|
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
|
||||||
|
import BrowserNative from "@/components/SpeechToText/BrowserNative";
|
||||||
|
|
||||||
|
const PROVIDERS = [
|
||||||
|
{
|
||||||
|
name: "System native",
|
||||||
|
value: "native",
|
||||||
|
logo: AnythingLLMIcon,
|
||||||
|
options: (settings) => <BrowserNative settings={settings} />,
|
||||||
|
description: "Uses your browser's built in STT service if supported.",
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
export default function SpeechToTextProvider({ settings }) {
|
||||||
|
const [saving, setSaving] = useState(false);
|
||||||
|
const [hasChanges, setHasChanges] = useState(false);
|
||||||
|
const [searchQuery, setSearchQuery] = useState("");
|
||||||
|
const [filteredProviders, setFilteredProviders] = useState([]);
|
||||||
|
const [selectedProvider, setSelectedProvider] = useState(
|
||||||
|
settings?.SpeechToTextProvider || "native"
|
||||||
|
);
|
||||||
|
const [searchMenuOpen, setSearchMenuOpen] = useState(false);
|
||||||
|
const searchInputRef = useRef(null);
|
||||||
|
|
||||||
|
const handleSubmit = async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const form = e.target;
|
||||||
|
const data = { SpeechToTextProvider: selectedProvider };
|
||||||
|
const formData = new FormData(form);
|
||||||
|
|
||||||
|
for (var [key, value] of formData.entries()) data[key] = value;
|
||||||
|
const { error } = await System.updateSystem(data);
|
||||||
|
setSaving(true);
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
showToast(`Failed to save preferences: ${error}`, "error");
|
||||||
|
} else {
|
||||||
|
showToast("Speech-to-text preferences saved successfully.", "success");
|
||||||
|
}
|
||||||
|
setSaving(false);
|
||||||
|
setHasChanges(!!error);
|
||||||
|
};
|
||||||
|
|
||||||
|
const updateProviderChoice = (selection) => {
|
||||||
|
setSearchQuery("");
|
||||||
|
setSelectedProvider(selection);
|
||||||
|
setSearchMenuOpen(false);
|
||||||
|
setHasChanges(true);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleXButton = () => {
|
||||||
|
if (searchQuery.length > 0) {
|
||||||
|
setSearchQuery("");
|
||||||
|
if (searchInputRef.current) searchInputRef.current.value = "";
|
||||||
|
} else {
|
||||||
|
setSearchMenuOpen(!searchMenuOpen);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const filtered = PROVIDERS.filter((provider) =>
|
||||||
|
provider.name.toLowerCase().includes(searchQuery.toLowerCase())
|
||||||
|
);
|
||||||
|
setFilteredProviders(filtered);
|
||||||
|
}, [searchQuery, selectedProvider]);
|
||||||
|
|
||||||
|
const selectedProviderObject = PROVIDERS.find(
|
||||||
|
(provider) => provider.value === selectedProvider
|
||||||
|
);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<form onSubmit={handleSubmit} className="flex w-full">
|
||||||
|
<div className="flex flex-col w-full px-1 md:pl-6 md:pr-[50px] md:py-6 py-16">
|
||||||
|
<div className="w-full flex flex-col gap-y-1 pb-6 border-white border-b-2 border-opacity-10">
|
||||||
|
<div className="flex gap-x-4 items-center">
|
||||||
|
<p className="text-lg leading-6 font-bold text-white">
|
||||||
|
Speech-to-text Preference
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<p className="text-xs leading-[18px] font-base text-white text-opacity-60">
|
||||||
|
Here you can specify what kind of text-to-speech and speech-to-text
|
||||||
|
providers you would want to use in your AnythingLLM experience. By
|
||||||
|
default, we use the browser's built in support for these services,
|
||||||
|
but you may want to use others.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div className="w-full justify-end flex">
|
||||||
|
{hasChanges && (
|
||||||
|
<CTAButton
|
||||||
|
onClick={() => handleSubmit()}
|
||||||
|
className="mt-3 mr-0 -mb-14 z-10"
|
||||||
|
>
|
||||||
|
{saving ? "Saving..." : "Save changes"}
|
||||||
|
</CTAButton>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="text-base font-bold text-white mt-6 mb-4">Provider</div>
|
||||||
|
<div className="relative">
|
||||||
|
{searchMenuOpen && (
|
||||||
|
<div
|
||||||
|
className="fixed top-0 left-0 w-full h-full bg-black bg-opacity-70 backdrop-blur-sm z-10"
|
||||||
|
onClick={() => setSearchMenuOpen(false)}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
{searchMenuOpen ? (
|
||||||
|
<div className="absolute top-0 left-0 w-full max-w-[640px] max-h-[310px] overflow-auto white-scrollbar min-h-[64px] bg-[#18181B] rounded-lg flex flex-col justify-between cursor-pointer border-2 border-[#46C8FF] z-20">
|
||||||
|
<div className="w-full flex flex-col gap-y-1">
|
||||||
|
<div className="flex items-center sticky top-0 border-b border-[#9CA3AF] mx-4 bg-[#18181B]">
|
||||||
|
<MagnifyingGlass
|
||||||
|
size={20}
|
||||||
|
weight="bold"
|
||||||
|
className="absolute left-4 z-30 text-white -ml-4 my-2"
|
||||||
|
/>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
name="stt-provider-search"
|
||||||
|
autoComplete="off"
|
||||||
|
placeholder="Search speech to text providers"
|
||||||
|
className="-ml-4 my-2 bg-transparent z-20 pl-12 h-[38px] w-full px-4 py-1 text-sm outline-none focus:border-white text-white placeholder:text-white placeholder:font-medium"
|
||||||
|
onChange={(e) => setSearchQuery(e.target.value)}
|
||||||
|
ref={searchInputRef}
|
||||||
|
onKeyDown={(e) => {
|
||||||
|
if (e.key === "Enter") e.preventDefault();
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<X
|
||||||
|
size={20}
|
||||||
|
weight="bold"
|
||||||
|
className="cursor-pointer text-white hover:text-[#9CA3AF]"
|
||||||
|
onClick={handleXButton}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="flex-1 pl-4 pr-2 flex flex-col gap-y-1 overflow-y-auto white-scrollbar pb-4">
|
||||||
|
{filteredProviders.map((provider) => (
|
||||||
|
<LLMItem
|
||||||
|
key={provider.name}
|
||||||
|
name={provider.name}
|
||||||
|
value={provider.value}
|
||||||
|
image={provider.logo}
|
||||||
|
description={provider.description}
|
||||||
|
checked={selectedProvider === provider.value}
|
||||||
|
onClick={() => updateProviderChoice(provider.value)}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<button
|
||||||
|
className="w-full max-w-[640px] h-[64px] bg-[#18181B] rounded-lg flex items-center p-[14px] justify-between cursor-pointer border-2 border-transparent hover:border-[#46C8FF] transition-all duration-300"
|
||||||
|
type="button"
|
||||||
|
onClick={() => setSearchMenuOpen(true)}
|
||||||
|
>
|
||||||
|
<div className="flex gap-x-4 items-center">
|
||||||
|
<img
|
||||||
|
src={selectedProviderObject.logo}
|
||||||
|
alt={`${selectedProviderObject.name} logo`}
|
||||||
|
className="w-10 h-10 rounded-md"
|
||||||
|
/>
|
||||||
|
<div className="flex flex-col text-left">
|
||||||
|
<div className="text-sm font-semibold text-white">
|
||||||
|
{selectedProviderObject.name}
|
||||||
|
</div>
|
||||||
|
<div className="mt-1 text-xs text-[#D2D5DB]">
|
||||||
|
{selectedProviderObject.description}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<CaretUpDown size={24} weight="bold" className="text-white" />
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
onChange={() => setHasChanges(true)}
|
||||||
|
className="mt-4 flex flex-col gap-y-1"
|
||||||
|
>
|
||||||
|
{selectedProvider &&
|
||||||
|
PROVIDERS.find(
|
||||||
|
(provider) => provider.value === selectedProvider
|
||||||
|
)?.options(settings)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
);
|
||||||
|
}
|
209
frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx
Normal file
209
frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx
Normal file
@ -0,0 +1,209 @@
|
|||||||
|
import React, { useEffect, useState, useRef } from "react";
|
||||||
|
import System from "@/models/system";
|
||||||
|
import showToast from "@/utils/toast";
|
||||||
|
import LLMItem from "@/components/LLMSelection/LLMItem";
|
||||||
|
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
|
||||||
|
import CTAButton from "@/components/lib/CTAButton";
|
||||||
|
import OpenAiLogo from "@/media/llmprovider/openai.png";
|
||||||
|
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
|
||||||
|
import ElevenLabsIcon from "@/media/ttsproviders/elevenlabs.png";
|
||||||
|
import BrowserNative from "@/components/TextToSpeech/BrowserNative";
|
||||||
|
import OpenAiTTSOptions from "@/components/TextToSpeech/OpenAiOptions";
|
||||||
|
import ElevenLabsTTSOptions from "@/components/TextToSpeech/ElevenLabsOptions";
|
||||||
|
|
||||||
|
const PROVIDERS = [
|
||||||
|
{
|
||||||
|
name: "System native",
|
||||||
|
value: "native",
|
||||||
|
logo: AnythingLLMIcon,
|
||||||
|
options: (settings) => <BrowserNative settings={settings} />,
|
||||||
|
description: "Uses your browser's built in TTS service if supported.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "OpenAI",
|
||||||
|
value: "openai",
|
||||||
|
logo: OpenAiLogo,
|
||||||
|
options: (settings) => <OpenAiTTSOptions settings={settings} />,
|
||||||
|
description: "Use OpenAI's text to speech voices.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ElevenLabs",
|
||||||
|
value: "elevenlabs",
|
||||||
|
logo: ElevenLabsIcon,
|
||||||
|
options: (settings) => <ElevenLabsTTSOptions settings={settings} />,
|
||||||
|
description: "Use ElevenLabs's text to speech voices and technology.",
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
export default function TextToSpeechProvider({ settings }) {
|
||||||
|
const [saving, setSaving] = useState(false);
|
||||||
|
const [hasChanges, setHasChanges] = useState(false);
|
||||||
|
const [searchQuery, setSearchQuery] = useState("");
|
||||||
|
const [filteredProviders, setFilteredProviders] = useState([]);
|
||||||
|
const [selectedProvider, setSelectedProvider] = useState(
|
||||||
|
settings?.TextToSpeechProvider || "native"
|
||||||
|
);
|
||||||
|
const [searchMenuOpen, setSearchMenuOpen] = useState(false);
|
||||||
|
const searchInputRef = useRef(null);
|
||||||
|
|
||||||
|
const handleSubmit = async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const form = e.target;
|
||||||
|
const data = { TextToSpeechProvider: selectedProvider };
|
||||||
|
const formData = new FormData(form);
|
||||||
|
|
||||||
|
for (var [key, value] of formData.entries()) data[key] = value;
|
||||||
|
const { error } = await System.updateSystem(data);
|
||||||
|
setSaving(true);
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
showToast(`Failed to save preferences: ${error}`, "error");
|
||||||
|
} else {
|
||||||
|
showToast("Text-to-speech preferences saved successfully.", "success");
|
||||||
|
}
|
||||||
|
setSaving(false);
|
||||||
|
setHasChanges(!!error);
|
||||||
|
};
|
||||||
|
|
||||||
|
const updateProviderChoice = (selection) => {
|
||||||
|
setSearchQuery("");
|
||||||
|
setSelectedProvider(selection);
|
||||||
|
setSearchMenuOpen(false);
|
||||||
|
setHasChanges(true);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleXButton = () => {
|
||||||
|
if (searchQuery.length > 0) {
|
||||||
|
setSearchQuery("");
|
||||||
|
if (searchInputRef.current) searchInputRef.current.value = "";
|
||||||
|
} else {
|
||||||
|
setSearchMenuOpen(!searchMenuOpen);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const filtered = PROVIDERS.filter((provider) =>
|
||||||
|
provider.name.toLowerCase().includes(searchQuery.toLowerCase())
|
||||||
|
);
|
||||||
|
setFilteredProviders(filtered);
|
||||||
|
}, [searchQuery, selectedProvider]);
|
||||||
|
|
||||||
|
const selectedProviderObject = PROVIDERS.find(
|
||||||
|
(provider) => provider.value === selectedProvider
|
||||||
|
);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<form onSubmit={handleSubmit} className="flex w-full">
|
||||||
|
<div className="flex flex-col w-full px-1 md:pl-6 md:pr-[50px] md:py-6 py-16">
|
||||||
|
<div className="w-full flex flex-col gap-y-1 pb-6 border-white border-b-2 border-opacity-10">
|
||||||
|
<div className="flex gap-x-4 items-center">
|
||||||
|
<p className="text-lg leading-6 font-bold text-white">
|
||||||
|
Text-to-speech Preference
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<p className="text-xs leading-[18px] font-base text-white text-opacity-60">
|
||||||
|
Here you can specify what kind of text-to-speech providers you would
|
||||||
|
want to use in your AnythingLLM experience. By default, we use the
|
||||||
|
browser's built in support for these services, but you may want to
|
||||||
|
use others.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div className="w-full justify-end flex">
|
||||||
|
{hasChanges && (
|
||||||
|
<CTAButton
|
||||||
|
onClick={() => handleSubmit()}
|
||||||
|
className="mt-3 mr-0 -mb-14 z-10"
|
||||||
|
>
|
||||||
|
{saving ? "Saving..." : "Save changes"}
|
||||||
|
</CTAButton>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="text-base font-bold text-white mt-6 mb-4">Provider</div>
|
||||||
|
<div className="relative">
|
||||||
|
{searchMenuOpen && (
|
||||||
|
<div
|
||||||
|
className="fixed top-0 left-0 w-full h-full bg-black bg-opacity-70 backdrop-blur-sm z-10"
|
||||||
|
onClick={() => setSearchMenuOpen(false)}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
{searchMenuOpen ? (
|
||||||
|
<div className="absolute top-0 left-0 w-full max-w-[640px] max-h-[310px] overflow-auto white-scrollbar min-h-[64px] bg-[#18181B] rounded-lg flex flex-col justify-between cursor-pointer border-2 border-[#46C8FF] z-20">
|
||||||
|
<div className="w-full flex flex-col gap-y-1">
|
||||||
|
<div className="flex items-center sticky top-0 border-b border-[#9CA3AF] mx-4 bg-[#18181B]">
|
||||||
|
<MagnifyingGlass
|
||||||
|
size={20}
|
||||||
|
weight="bold"
|
||||||
|
className="absolute left-4 z-30 text-white -ml-4 my-2"
|
||||||
|
/>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
name="tts-provider-search"
|
||||||
|
autoComplete="off"
|
||||||
|
placeholder="Search text to speech providers"
|
||||||
|
className="-ml-4 my-2 bg-transparent z-20 pl-12 h-[38px] w-full px-4 py-1 text-sm outline-none focus:border-white text-white placeholder:text-white placeholder:font-medium"
|
||||||
|
onChange={(e) => setSearchQuery(e.target.value)}
|
||||||
|
ref={searchInputRef}
|
||||||
|
onKeyDown={(e) => {
|
||||||
|
if (e.key === "Enter") e.preventDefault();
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<X
|
||||||
|
size={20}
|
||||||
|
weight="bold"
|
||||||
|
className="cursor-pointer text-white hover:text-[#9CA3AF]"
|
||||||
|
onClick={handleXButton}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="flex-1 pl-4 pr-2 flex flex-col gap-y-1 overflow-y-auto white-scrollbar pb-4">
|
||||||
|
{filteredProviders.map((provider) => (
|
||||||
|
<LLMItem
|
||||||
|
key={provider.name}
|
||||||
|
name={provider.name}
|
||||||
|
value={provider.value}
|
||||||
|
image={provider.logo}
|
||||||
|
description={provider.description}
|
||||||
|
checked={selectedProvider === provider.value}
|
||||||
|
onClick={() => updateProviderChoice(provider.value)}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<button
|
||||||
|
className="w-full max-w-[640px] h-[64px] bg-[#18181B] rounded-lg flex items-center p-[14px] justify-between cursor-pointer border-2 border-transparent hover:border-[#46C8FF] transition-all duration-300"
|
||||||
|
type="button"
|
||||||
|
onClick={() => setSearchMenuOpen(true)}
|
||||||
|
>
|
||||||
|
<div className="flex gap-x-4 items-center">
|
||||||
|
<img
|
||||||
|
src={selectedProviderObject.logo}
|
||||||
|
alt={`${selectedProviderObject.name} logo`}
|
||||||
|
className="w-10 h-10 rounded-md"
|
||||||
|
/>
|
||||||
|
<div className="flex flex-col text-left">
|
||||||
|
<div className="text-sm font-semibold text-white">
|
||||||
|
{selectedProviderObject.name}
|
||||||
|
</div>
|
||||||
|
<div className="mt-1 text-xs text-[#D2D5DB]">
|
||||||
|
{selectedProviderObject.description}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<CaretUpDown size={24} weight="bold" className="text-white" />
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
onChange={() => setHasChanges(true)}
|
||||||
|
className="mt-4 flex flex-col gap-y-1"
|
||||||
|
>
|
||||||
|
{selectedProvider &&
|
||||||
|
PROVIDERS.find(
|
||||||
|
(provider) => provider.value === selectedProvider
|
||||||
|
)?.options(settings)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
);
|
||||||
|
}
|
@ -98,6 +98,9 @@ export default {
|
|||||||
transcriptionPreference: () => {
|
transcriptionPreference: () => {
|
||||||
return "/settings/transcription-preference";
|
return "/settings/transcription-preference";
|
||||||
},
|
},
|
||||||
|
audioPreference: () => {
|
||||||
|
return "/settings/audio-preference";
|
||||||
|
},
|
||||||
embedder: {
|
embedder: {
|
||||||
modelPreference: () => "/settings/embedding-preference",
|
modelPreference: () => "/settings/embedding-preference",
|
||||||
chunkingPreference: () => "/settings/text-splitter-preference",
|
chunkingPreference: () => "/settings/text-splitter-preference",
|
||||||
|
@ -2841,6 +2841,11 @@ react-smooth@^4.0.0:
|
|||||||
prop-types "^15.8.1"
|
prop-types "^15.8.1"
|
||||||
react-transition-group "^4.4.5"
|
react-transition-group "^4.4.5"
|
||||||
|
|
||||||
|
react-speech-recognition@^3.10.0:
|
||||||
|
version "3.10.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/react-speech-recognition/-/react-speech-recognition-3.10.0.tgz#7aa43bb28d78b92671864dabba3a70489ccad27b"
|
||||||
|
integrity sha512-EVSr4Ik8l9urwdPiK2r0+ADrLyDDrjB0qBRdUWO+w2MfwEBrj6NuRmy1GD3x7BU/V6/hab0pl8Lupen0zwlJyw==
|
||||||
|
|
||||||
react-tag-input-component@^2.0.2:
|
react-tag-input-component@^2.0.2:
|
||||||
version "2.0.2"
|
version "2.0.2"
|
||||||
resolved "https://registry.yarnpkg.com/react-tag-input-component/-/react-tag-input-component-2.0.2.tgz#f62f013c6a535141dd1c6c3a88858223170150f1"
|
resolved "https://registry.yarnpkg.com/react-tag-input-component/-/react-tag-input-component-2.0.2.tgz#f62f013c6a535141dd1c6c3a88858223170150f1"
|
||||||
|
@ -168,6 +168,19 @@ WHISPER_PROVIDER="local"
|
|||||||
# WHISPER_PROVIDER="openai"
|
# WHISPER_PROVIDER="openai"
|
||||||
# OPEN_AI_KEY=sk-xxxxxxxx
|
# OPEN_AI_KEY=sk-xxxxxxxx
|
||||||
|
|
||||||
|
###########################################
|
||||||
|
######## TTS/STT Model Selection ##########
|
||||||
|
###########################################
|
||||||
|
TTS_PROVIDER="native"
|
||||||
|
|
||||||
|
# TTS_PROVIDER="openai"
|
||||||
|
# TTS_OPEN_AI_KEY=sk-example
|
||||||
|
# TTS_OPEN_AI_VOICE_MODEL=nova
|
||||||
|
|
||||||
|
# TTS_PROVIDER="elevenlabs"
|
||||||
|
# TTS_ELEVEN_LABS_KEY=
|
||||||
|
# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel
|
||||||
|
|
||||||
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
||||||
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
||||||
# STORAGE_DIR= # absolute filesystem path with no trailing slash
|
# STORAGE_DIR= # absolute filesystem path with no trailing slash
|
||||||
|
@ -1,6 +1,11 @@
|
|||||||
const path = require("path");
|
const path = require("path");
|
||||||
const fs = require("fs");
|
const fs = require("fs");
|
||||||
const { reqBody, multiUserMode, userFromSession } = require("../utils/http");
|
const {
|
||||||
|
reqBody,
|
||||||
|
multiUserMode,
|
||||||
|
userFromSession,
|
||||||
|
safeJsonParse,
|
||||||
|
} = require("../utils/http");
|
||||||
const { normalizePath } = require("../utils/files");
|
const { normalizePath } = require("../utils/files");
|
||||||
const { Workspace } = require("../models/workspace");
|
const { Workspace } = require("../models/workspace");
|
||||||
const { Document } = require("../models/documents");
|
const { Document } = require("../models/documents");
|
||||||
@ -25,6 +30,7 @@ const {
|
|||||||
determineWorkspacePfpFilepath,
|
determineWorkspacePfpFilepath,
|
||||||
fetchPfp,
|
fetchPfp,
|
||||||
} = require("../utils/files/pfp");
|
} = require("../utils/files/pfp");
|
||||||
|
const { getTTSProvider } = require("../utils/TextToSpeech");
|
||||||
|
|
||||||
function workspaceEndpoints(app) {
|
function workspaceEndpoints(app) {
|
||||||
if (!app) return;
|
if (!app) return;
|
||||||
@ -506,6 +512,48 @@ function workspaceEndpoints(app) {
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
app.get(
|
||||||
|
"/workspace/:slug/tts/:chatId",
|
||||||
|
[validatedRequest, flexUserRoleValid([ROLES.all]), validWorkspaceSlug],
|
||||||
|
async function (request, response) {
|
||||||
|
try {
|
||||||
|
const { chatId } = request.params;
|
||||||
|
const workspace = response.locals.workspace;
|
||||||
|
const cacheKey = `${workspace.slug}:${chatId}`;
|
||||||
|
const wsChat = await WorkspaceChats.get({
|
||||||
|
id: Number(chatId),
|
||||||
|
workspaceId: workspace.id,
|
||||||
|
});
|
||||||
|
|
||||||
|
const cachedResponse = responseCache.get(cacheKey);
|
||||||
|
if (cachedResponse) {
|
||||||
|
response.writeHead(200, {
|
||||||
|
"Content-Type": cachedResponse.mime || "audio/mpeg",
|
||||||
|
});
|
||||||
|
response.end(cachedResponse.buffer);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const text = safeJsonParse(wsChat.response, null)?.text;
|
||||||
|
if (!text) return response.sendStatus(204).end();
|
||||||
|
|
||||||
|
const TTSProvider = getTTSProvider();
|
||||||
|
const buffer = await TTSProvider.ttsBuffer(text);
|
||||||
|
if (buffer === null) return response.sendStatus(204).end();
|
||||||
|
|
||||||
|
responseCache.set(cacheKey, { buffer, mime: "audio/mpeg" });
|
||||||
|
response.writeHead(200, {
|
||||||
|
"Content-Type": "audio/mpeg",
|
||||||
|
});
|
||||||
|
response.end(buffer);
|
||||||
|
return;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error processing the TTS request:", error);
|
||||||
|
response.status(500).json({ message: "TTS could not be completed" });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
app.get(
|
app.get(
|
||||||
"/workspace/:slug/pfp",
|
"/workspace/:slug/pfp",
|
||||||
[validatedRequest, flexUserRoleValid([ROLES.all])],
|
[validatedRequest, flexUserRoleValid([ROLES.all])],
|
||||||
|
@ -131,6 +131,17 @@ const SystemSettings = {
|
|||||||
// --------------------------------------------------------
|
// --------------------------------------------------------
|
||||||
WhisperProvider: process.env.WHISPER_PROVIDER || "local",
|
WhisperProvider: process.env.WHISPER_PROVIDER || "local",
|
||||||
|
|
||||||
|
// --------------------------------------------------------
|
||||||
|
// TTS/STT Selection Settings & Configs
|
||||||
|
// - Currently the only 3rd party is OpenAI or the native browser-built in
|
||||||
|
// --------------------------------------------------------
|
||||||
|
TextToSpeechProvider: process.env.TTS_PROVIDER || "native",
|
||||||
|
TTSOpenAIKey: !!process.env.TTS_OPEN_AI_KEY,
|
||||||
|
TTSOpenAIVoiceModel: process.env.TTS_OPEN_AI_VOICE_MODEL,
|
||||||
|
// Eleven Labs TTS
|
||||||
|
TTSElevenLabsKey: !!process.env.TTS_ELEVEN_LABS_KEY,
|
||||||
|
TTSElevenLabsVoiceModel: process.env.TTS_ELEVEN_LABS_VOICE_MODEL,
|
||||||
|
|
||||||
// --------------------------------------------------------
|
// --------------------------------------------------------
|
||||||
// Agent Settings & Configs
|
// Agent Settings & Configs
|
||||||
// --------------------------------------------------------
|
// --------------------------------------------------------
|
||||||
|
@ -44,6 +44,7 @@
|
|||||||
"cohere-ai": "^7.9.5",
|
"cohere-ai": "^7.9.5",
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
"dotenv": "^16.0.3",
|
"dotenv": "^16.0.3",
|
||||||
|
"elevenlabs": "^0.5.0",
|
||||||
"express": "^4.18.2",
|
"express": "^4.18.2",
|
||||||
"express-ws": "^5.0.2",
|
"express-ws": "^5.0.2",
|
||||||
"extract-json-from-string": "^1.0.1",
|
"extract-json-from-string": "^1.0.1",
|
||||||
|
54
server/utils/TextToSpeech/elevenLabs/index.js
Normal file
54
server/utils/TextToSpeech/elevenLabs/index.js
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
const { ElevenLabsClient, stream } = require("elevenlabs");
|
||||||
|
|
||||||
|
class ElevenLabsTTS {
|
||||||
|
constructor() {
|
||||||
|
if (!process.env.TTS_ELEVEN_LABS_KEY)
|
||||||
|
throw new Error("No ElevenLabs API key was set.");
|
||||||
|
this.elevenLabs = new ElevenLabsClient({
|
||||||
|
apiKey: process.env.TTS_ELEVEN_LABS_KEY,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Rachel as default voice
|
||||||
|
// https://api.elevenlabs.io/v1/voices
|
||||||
|
this.voiceId =
|
||||||
|
process.env.TTS_ELEVEN_LABS_VOICE_MODEL ?? "21m00Tcm4TlvDq8ikWAM";
|
||||||
|
this.modelId = "eleven_multilingual_v2";
|
||||||
|
}
|
||||||
|
|
||||||
|
static async voices(apiKey = null) {
|
||||||
|
try {
|
||||||
|
const client = new ElevenLabsClient({
|
||||||
|
apiKey: apiKey ?? process.env.TTS_ELEVEN_LABS_KEY ?? null,
|
||||||
|
});
|
||||||
|
return (await client.voices.getAll())?.voices ?? [];
|
||||||
|
} catch {}
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
#stream2buffer(stream) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const _buf = [];
|
||||||
|
stream.on("data", (chunk) => _buf.push(chunk));
|
||||||
|
stream.on("end", () => resolve(Buffer.concat(_buf)));
|
||||||
|
stream.on("error", (err) => reject(err));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async ttsBuffer(textInput) {
|
||||||
|
try {
|
||||||
|
const audio = await this.elevenLabs.generate({
|
||||||
|
voice: this.voiceId,
|
||||||
|
text: textInput,
|
||||||
|
model_id: "eleven_multilingual_v2",
|
||||||
|
});
|
||||||
|
return Buffer.from(await this.#stream2buffer(audio));
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
ElevenLabsTTS,
|
||||||
|
};
|
15
server/utils/TextToSpeech/index.js
Normal file
15
server/utils/TextToSpeech/index.js
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
function getTTSProvider() {
|
||||||
|
const provider = process.env.TTS_PROVIDER || "openai";
|
||||||
|
switch (provider) {
|
||||||
|
case "openai":
|
||||||
|
const { OpenAiTTS } = require("./openAi");
|
||||||
|
return new OpenAiTTS();
|
||||||
|
case "elevenlabs":
|
||||||
|
const { ElevenLabsTTS } = require("./elevenLabs");
|
||||||
|
return new ElevenLabsTTS();
|
||||||
|
default:
|
||||||
|
throw new Error("ENV: No TTS_PROVIDER value found in environment!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = { getTTSProvider };
|
29
server/utils/TextToSpeech/openAi/index.js
Normal file
29
server/utils/TextToSpeech/openAi/index.js
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
class OpenAiTTS {
|
||||||
|
constructor() {
|
||||||
|
if (!process.env.TTS_OPEN_AI_KEY)
|
||||||
|
throw new Error("No OpenAI API key was set.");
|
||||||
|
const { OpenAI: OpenAIApi } = require("openai");
|
||||||
|
this.openai = new OpenAIApi({
|
||||||
|
apiKey: process.env.TTS_OPEN_AI_KEY,
|
||||||
|
});
|
||||||
|
this.voice = process.env.TTS_OPEN_AI_VOICE_MODEL ?? "alloy";
|
||||||
|
}
|
||||||
|
|
||||||
|
async ttsBuffer(textInput) {
|
||||||
|
try {
|
||||||
|
const result = await this.openai.audio.speech.create({
|
||||||
|
model: "tts-1",
|
||||||
|
voice: this.voice,
|
||||||
|
input: textInput,
|
||||||
|
});
|
||||||
|
return Buffer.from(await result.arrayBuffer());
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
OpenAiTTS,
|
||||||
|
};
|
@ -4,6 +4,7 @@ const {
|
|||||||
} = require("../AiProviders/openRouter");
|
} = require("../AiProviders/openRouter");
|
||||||
const { perplexityModels } = require("../AiProviders/perplexity");
|
const { perplexityModels } = require("../AiProviders/perplexity");
|
||||||
const { togetherAiModels } = require("../AiProviders/togetherAi");
|
const { togetherAiModels } = require("../AiProviders/togetherAi");
|
||||||
|
const { ElevenLabsTTS } = require("../TextToSpeech/elevenLabs");
|
||||||
const SUPPORT_CUSTOM_MODELS = [
|
const SUPPORT_CUSTOM_MODELS = [
|
||||||
"openai",
|
"openai",
|
||||||
"localai",
|
"localai",
|
||||||
@ -15,6 +16,7 @@ const SUPPORT_CUSTOM_MODELS = [
|
|||||||
"openrouter",
|
"openrouter",
|
||||||
"lmstudio",
|
"lmstudio",
|
||||||
"koboldcpp",
|
"koboldcpp",
|
||||||
|
"elevenlabs-tts",
|
||||||
];
|
];
|
||||||
|
|
||||||
async function getCustomModels(provider = "", apiKey = null, basePath = null) {
|
async function getCustomModels(provider = "", apiKey = null, basePath = null) {
|
||||||
@ -42,6 +44,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
|
|||||||
return await getLMStudioModels(basePath);
|
return await getLMStudioModels(basePath);
|
||||||
case "koboldcpp":
|
case "koboldcpp":
|
||||||
return await getKoboldCPPModels(basePath);
|
return await getKoboldCPPModels(basePath);
|
||||||
|
case "elevenlabs-tts":
|
||||||
|
return await getElevenLabsModels(apiKey);
|
||||||
default:
|
default:
|
||||||
return { models: [], error: "Invalid provider for custom models" };
|
return { models: [], error: "Invalid provider for custom models" };
|
||||||
}
|
}
|
||||||
@ -321,6 +325,32 @@ function nativeLLMModels() {
|
|||||||
return { models: files, error: null };
|
return { models: files, error: null };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function getElevenLabsModels(apiKey = null) {
|
||||||
|
const models = (await ElevenLabsTTS.voices(apiKey)).map((model) => {
|
||||||
|
return {
|
||||||
|
id: model.voice_id,
|
||||||
|
organization: model.category,
|
||||||
|
name: model.name,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
if (models.length === 0) {
|
||||||
|
return {
|
||||||
|
models: [
|
||||||
|
{
|
||||||
|
id: "21m00Tcm4TlvDq8ikWAM",
|
||||||
|
organization: "premade",
|
||||||
|
name: "Rachel (default)",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
error: null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (models.length > 0 && !!apiKey) process.env.TTS_ELEVEN_LABS_KEY = apiKey;
|
||||||
|
return { models, error: null };
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
getCustomModels,
|
getCustomModels,
|
||||||
};
|
};
|
||||||
|
@ -366,6 +366,32 @@ const KEY_MAPPING = {
|
|||||||
envKey: "AGENT_SERPER_DEV_KEY",
|
envKey: "AGENT_SERPER_DEV_KEY",
|
||||||
checks: [],
|
checks: [],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// TTS/STT Integration ENVS
|
||||||
|
TextToSpeechProvider: {
|
||||||
|
envKey: "TTS_PROVIDER",
|
||||||
|
checks: [supportedTTSProvider],
|
||||||
|
},
|
||||||
|
|
||||||
|
// TTS OpenAI
|
||||||
|
TTSOpenAIKey: {
|
||||||
|
envKey: "TTS_OPEN_AI_KEY",
|
||||||
|
checks: [validOpenAIKey],
|
||||||
|
},
|
||||||
|
TTSOpenAIVoiceModel: {
|
||||||
|
envKey: "TTS_OPEN_AI_VOICE_MODEL",
|
||||||
|
checks: [],
|
||||||
|
},
|
||||||
|
|
||||||
|
// TTS ElevenLabs
|
||||||
|
TTSElevenLabsKey: {
|
||||||
|
envKey: "TTS_ELEVEN_LABS_KEY",
|
||||||
|
checks: [isNotEmpty],
|
||||||
|
},
|
||||||
|
TTSElevenLabsVoiceModel: {
|
||||||
|
envKey: "TTS_ELEVEN_LABS_VOICE_MODEL",
|
||||||
|
checks: [],
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
function isNotEmpty(input = "") {
|
function isNotEmpty(input = "") {
|
||||||
@ -419,6 +445,11 @@ function validOllamaLLMBasePath(input = "") {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function supportedTTSProvider(input = "") {
|
||||||
|
const validSelection = ["native", "openai", "elevenlabs"].includes(input);
|
||||||
|
return validSelection ? null : `${input} is not a valid TTS provider.`;
|
||||||
|
}
|
||||||
|
|
||||||
function supportedLLM(input = "") {
|
function supportedLLM(input = "") {
|
||||||
const validSelection = [
|
const validSelection = [
|
||||||
"openai",
|
"openai",
|
||||||
|
@ -1901,6 +1901,11 @@ combined-stream@^1.0.8:
|
|||||||
dependencies:
|
dependencies:
|
||||||
delayed-stream "~1.0.0"
|
delayed-stream "~1.0.0"
|
||||||
|
|
||||||
|
command-exists@^1.2.9:
|
||||||
|
version "1.2.9"
|
||||||
|
resolved "https://registry.yarnpkg.com/command-exists/-/command-exists-1.2.9.tgz#c50725af3808c8ab0260fd60b01fbfa25b954f69"
|
||||||
|
integrity sha512-LTQ/SGc+s0Xc0Fu5WaKnR0YiygZkm9eKFvyS+fRsU7/ZWFF8ykFM6Pc9aCVf1+xasOOZpO3BAVgVrKvsqKHV7w==
|
||||||
|
|
||||||
command-line-args@5.2.1, command-line-args@^5.2.1:
|
command-line-args@5.2.1, command-line-args@^5.2.1:
|
||||||
version "5.2.1"
|
version "5.2.1"
|
||||||
resolved "https://registry.yarnpkg.com/command-line-args/-/command-line-args-5.2.1.tgz#c44c32e437a57d7c51157696893c5909e9cec42e"
|
resolved "https://registry.yarnpkg.com/command-line-args/-/command-line-args-5.2.1.tgz#c44c32e437a57d7c51157696893c5909e9cec42e"
|
||||||
@ -2255,6 +2260,18 @@ ee-first@1.1.1:
|
|||||||
resolved "https://registry.yarnpkg.com/ee-first/-/ee-first-1.1.1.tgz#590c61156b0ae2f4f0255732a158b266bc56b21d"
|
resolved "https://registry.yarnpkg.com/ee-first/-/ee-first-1.1.1.tgz#590c61156b0ae2f4f0255732a158b266bc56b21d"
|
||||||
integrity sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==
|
integrity sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==
|
||||||
|
|
||||||
|
elevenlabs@^0.5.0:
|
||||||
|
version "0.5.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/elevenlabs/-/elevenlabs-0.5.0.tgz#07eb1a943b0ab99b925875bd5c57833a3a024e58"
|
||||||
|
integrity sha512-jfex4ecuWIlyAUuMrMJAJNa5MLziqYQOCDw4ZYuoc9PCYLxtHwaYBWpZoDhnYMcceLI7rRRvmbLMcT9HlVMfHA==
|
||||||
|
dependencies:
|
||||||
|
command-exists "^1.2.9"
|
||||||
|
execa "^5.1.1"
|
||||||
|
form-data "4.0.0"
|
||||||
|
node-fetch "2.7.0"
|
||||||
|
qs "6.11.2"
|
||||||
|
url-join "4.0.1"
|
||||||
|
|
||||||
emoji-regex@^10.2.1:
|
emoji-regex@^10.2.1:
|
||||||
version "10.3.0"
|
version "10.3.0"
|
||||||
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-10.3.0.tgz#76998b9268409eb3dae3de989254d456e70cfe23"
|
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-10.3.0.tgz#76998b9268409eb3dae3de989254d456e70cfe23"
|
||||||
@ -2605,6 +2622,21 @@ eventemitter3@^4.0.4:
|
|||||||
resolved "https://registry.yarnpkg.com/eventemitter3/-/eventemitter3-4.0.7.tgz#2de9b68f6528d5644ef5c59526a1b4a07306169f"
|
resolved "https://registry.yarnpkg.com/eventemitter3/-/eventemitter3-4.0.7.tgz#2de9b68f6528d5644ef5c59526a1b4a07306169f"
|
||||||
integrity sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==
|
integrity sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==
|
||||||
|
|
||||||
|
execa@^5.1.1:
|
||||||
|
version "5.1.1"
|
||||||
|
resolved "https://registry.yarnpkg.com/execa/-/execa-5.1.1.tgz#f80ad9cbf4298f7bd1d4c9555c21e93741c411dd"
|
||||||
|
integrity sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==
|
||||||
|
dependencies:
|
||||||
|
cross-spawn "^7.0.3"
|
||||||
|
get-stream "^6.0.0"
|
||||||
|
human-signals "^2.1.0"
|
||||||
|
is-stream "^2.0.0"
|
||||||
|
merge-stream "^2.0.0"
|
||||||
|
npm-run-path "^4.0.1"
|
||||||
|
onetime "^5.1.2"
|
||||||
|
signal-exit "^3.0.3"
|
||||||
|
strip-final-newline "^2.0.0"
|
||||||
|
|
||||||
expand-template@^2.0.3:
|
expand-template@^2.0.3:
|
||||||
version "2.0.3"
|
version "2.0.3"
|
||||||
resolved "https://registry.yarnpkg.com/expand-template/-/expand-template-2.0.3.tgz#6e14b3fcee0f3a6340ecb57d2e8918692052a47c"
|
resolved "https://registry.yarnpkg.com/expand-template/-/expand-template-2.0.3.tgz#6e14b3fcee0f3a6340ecb57d2e8918692052a47c"
|
||||||
@ -3024,6 +3056,11 @@ get-stream@^5.1.0:
|
|||||||
dependencies:
|
dependencies:
|
||||||
pump "^3.0.0"
|
pump "^3.0.0"
|
||||||
|
|
||||||
|
get-stream@^6.0.0:
|
||||||
|
version "6.0.1"
|
||||||
|
resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-6.0.1.tgz#a262d8eef67aced57c2852ad6167526a43cbf7b7"
|
||||||
|
integrity sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==
|
||||||
|
|
||||||
get-symbol-description@^1.0.2:
|
get-symbol-description@^1.0.2:
|
||||||
version "1.0.2"
|
version "1.0.2"
|
||||||
resolved "https://registry.yarnpkg.com/get-symbol-description/-/get-symbol-description-1.0.2.tgz#533744d5aa20aca4e079c8e5daf7fd44202821f5"
|
resolved "https://registry.yarnpkg.com/get-symbol-description/-/get-symbol-description-1.0.2.tgz#533744d5aa20aca4e079c8e5daf7fd44202821f5"
|
||||||
@ -3297,6 +3334,11 @@ https-proxy-agent@^7.0.0:
|
|||||||
agent-base "^7.0.2"
|
agent-base "^7.0.2"
|
||||||
debug "4"
|
debug "4"
|
||||||
|
|
||||||
|
human-signals@^2.1.0:
|
||||||
|
version "2.1.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0"
|
||||||
|
integrity sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==
|
||||||
|
|
||||||
humanize-ms@^1.2.1:
|
humanize-ms@^1.2.1:
|
||||||
version "1.2.1"
|
version "1.2.1"
|
||||||
resolved "https://registry.yarnpkg.com/humanize-ms/-/humanize-ms-1.2.1.tgz#c46e3159a293f6b896da29316d8b6fe8bb79bbed"
|
resolved "https://registry.yarnpkg.com/humanize-ms/-/humanize-ms-1.2.1.tgz#c46e3159a293f6b896da29316d8b6fe8bb79bbed"
|
||||||
@ -4092,6 +4134,11 @@ merge-descriptors@1.0.1:
|
|||||||
resolved "https://registry.yarnpkg.com/merge-descriptors/-/merge-descriptors-1.0.1.tgz#b00aaa556dd8b44568150ec9d1b953f3f90cbb61"
|
resolved "https://registry.yarnpkg.com/merge-descriptors/-/merge-descriptors-1.0.1.tgz#b00aaa556dd8b44568150ec9d1b953f3f90cbb61"
|
||||||
integrity sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w==
|
integrity sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w==
|
||||||
|
|
||||||
|
merge-stream@^2.0.0:
|
||||||
|
version "2.0.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/merge-stream/-/merge-stream-2.0.0.tgz#52823629a14dd00c9770fb6ad47dc6310f2c1f60"
|
||||||
|
integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==
|
||||||
|
|
||||||
methods@~1.1.2:
|
methods@~1.1.2:
|
||||||
version "1.1.2"
|
version "1.1.2"
|
||||||
resolved "https://registry.yarnpkg.com/methods/-/methods-1.1.2.tgz#5529a4d67654134edcc5266656835b0f851afcee"
|
resolved "https://registry.yarnpkg.com/methods/-/methods-1.1.2.tgz#5529a4d67654134edcc5266656835b0f851afcee"
|
||||||
@ -4455,6 +4502,13 @@ normalize-path@^3.0.0, normalize-path@~3.0.0:
|
|||||||
resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65"
|
resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65"
|
||||||
integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==
|
integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==
|
||||||
|
|
||||||
|
npm-run-path@^4.0.1:
|
||||||
|
version "4.0.1"
|
||||||
|
resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-4.0.1.tgz#b7ecd1e5ed53da8e37a55e1c2269e0b97ed748ea"
|
||||||
|
integrity sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==
|
||||||
|
dependencies:
|
||||||
|
path-key "^3.0.0"
|
||||||
|
|
||||||
npmlog@^5.0.1:
|
npmlog@^5.0.1:
|
||||||
version "5.0.1"
|
version "5.0.1"
|
||||||
resolved "https://registry.yarnpkg.com/npmlog/-/npmlog-5.0.1.tgz#f06678e80e29419ad67ab964e0fa69959c1eb8b0"
|
resolved "https://registry.yarnpkg.com/npmlog/-/npmlog-5.0.1.tgz#f06678e80e29419ad67ab964e0fa69959c1eb8b0"
|
||||||
@ -4593,7 +4647,7 @@ one-time@^1.0.0:
|
|||||||
dependencies:
|
dependencies:
|
||||||
fn.name "1.x.x"
|
fn.name "1.x.x"
|
||||||
|
|
||||||
onetime@^5.1.0:
|
onetime@^5.1.0, onetime@^5.1.2:
|
||||||
version "5.1.2"
|
version "5.1.2"
|
||||||
resolved "https://registry.yarnpkg.com/onetime/-/onetime-5.1.2.tgz#d0e96ebb56b07476df1dd9c4806e5237985ca45e"
|
resolved "https://registry.yarnpkg.com/onetime/-/onetime-5.1.2.tgz#d0e96ebb56b07476df1dd9c4806e5237985ca45e"
|
||||||
integrity sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==
|
integrity sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==
|
||||||
@ -4774,7 +4828,7 @@ path-is-absolute@^1.0.0:
|
|||||||
resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f"
|
resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f"
|
||||||
integrity sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==
|
integrity sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==
|
||||||
|
|
||||||
path-key@^3.1.0:
|
path-key@^3.0.0, path-key@^3.1.0:
|
||||||
version "3.1.1"
|
version "3.1.1"
|
||||||
resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375"
|
resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375"
|
||||||
integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==
|
integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==
|
||||||
@ -5322,7 +5376,7 @@ side-channel@^1.0.4, side-channel@^1.0.6:
|
|||||||
get-intrinsic "^1.2.4"
|
get-intrinsic "^1.2.4"
|
||||||
object-inspect "^1.13.1"
|
object-inspect "^1.13.1"
|
||||||
|
|
||||||
signal-exit@^3.0.0, signal-exit@^3.0.2, signal-exit@^3.0.7:
|
signal-exit@^3.0.0, signal-exit@^3.0.2, signal-exit@^3.0.3, signal-exit@^3.0.7:
|
||||||
version "3.0.7"
|
version "3.0.7"
|
||||||
resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9"
|
resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9"
|
||||||
integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==
|
integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==
|
||||||
@ -5559,6 +5613,11 @@ strip-ansi@^7.0.1, strip-ansi@^7.1.0:
|
|||||||
dependencies:
|
dependencies:
|
||||||
ansi-regex "^6.0.1"
|
ansi-regex "^6.0.1"
|
||||||
|
|
||||||
|
strip-final-newline@^2.0.0:
|
||||||
|
version "2.0.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad"
|
||||||
|
integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==
|
||||||
|
|
||||||
strip-json-comments@^3.1.1:
|
strip-json-comments@^3.1.1:
|
||||||
version "3.1.1"
|
version "3.1.1"
|
||||||
resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
|
resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
|
||||||
|
Loading…
Reference in New Issue
Block a user