From b6be43be95049209dd362ea3adc94f7cf7935128 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Tue, 14 May 2024 11:57:21 -0700 Subject: [PATCH] Add Speech-to-text and Text-to-speech providers (#1394) * Add Speech-to-text and Text-to-speech providers * add files and update comment * update comments * patch: bad playerRef check --- .vscode/settings.json | 1 + docker/.env.example | 13 ++ frontend/package.json | 1 + frontend/src/App.jsx | 7 + .../src/components/SettingsSidebar/index.jsx | 9 + .../SpeechToText/BrowserNative/index.jsx | 9 + .../TextToSpeech/BrowserNative/index.jsx | 9 + .../TextToSpeech/ElevenLabsOptions/index.jsx | 107 +++++++++ .../TextToSpeech/OpenAiOptions/index.jsx | 45 ++++ .../Actions/TTSButton/asyncTts.jsx | 94 ++++++++ .../Actions/TTSButton/index.jsx | 23 ++ .../Actions/TTSButton/native.jsx | 61 +++++ .../HistoricalMessage/Actions/index.jsx | 65 +----- .../PromptInput/SpeechToText/index.jsx | 82 +++++++ .../ChatContainer/PromptInput/index.jsx | 5 + .../src/media/ttsproviders/elevenlabs.png | Bin 0 -> 6422 bytes frontend/src/models/system.js | 2 +- frontend/src/models/workspace.js | 17 +- .../GeneralSettings/AudioPreference/index.jsx | 45 ++++ .../GeneralSettings/AudioPreference/stt.jsx | 191 ++++++++++++++++ .../GeneralSettings/AudioPreference/tts.jsx | 209 ++++++++++++++++++ frontend/src/utils/paths.js | 3 + frontend/yarn.lock | 5 + server/.env.example | 13 ++ server/endpoints/workspaces.js | 50 ++++- server/models/systemSettings.js | 11 + server/package.json | 1 + server/utils/TextToSpeech/elevenLabs/index.js | 54 +++++ server/utils/TextToSpeech/index.js | 15 ++ server/utils/TextToSpeech/openAi/index.js | 29 +++ server/utils/helpers/customModels.js | 30 +++ server/utils/helpers/updateENV.js | 31 +++ server/yarn.lock | 65 +++++- 33 files changed, 1234 insertions(+), 68 deletions(-) create mode 100644 frontend/src/components/SpeechToText/BrowserNative/index.jsx create mode 100644 frontend/src/components/TextToSpeech/BrowserNative/index.jsx create mode 100644 frontend/src/components/TextToSpeech/ElevenLabsOptions/index.jsx create mode 100644 frontend/src/components/TextToSpeech/OpenAiOptions/index.jsx create mode 100644 frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/asyncTts.jsx create mode 100644 frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/index.jsx create mode 100644 frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/native.jsx create mode 100644 frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/SpeechToText/index.jsx create mode 100644 frontend/src/media/ttsproviders/elevenlabs.png create mode 100644 frontend/src/pages/GeneralSettings/AudioPreference/index.jsx create mode 100644 frontend/src/pages/GeneralSettings/AudioPreference/stt.jsx create mode 100644 frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx create mode 100644 server/utils/TextToSpeech/elevenLabs/index.js create mode 100644 server/utils/TextToSpeech/index.js create mode 100644 server/utils/TextToSpeech/openAi/index.js diff --git a/.vscode/settings.json b/.vscode/settings.json index 110c4fa6..4930aa2d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,6 +11,7 @@ "cooldowns", "Deduplicator", "Dockerized", + "elevenlabs", "Embeddable", "epub", "GROQ", diff --git a/docker/.env.example b/docker/.env.example index 8cfa2aea..70059ea5 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -171,6 +171,19 @@ GID='1000' # WHISPER_PROVIDER="openai" # OPEN_AI_KEY=sk-xxxxxxxx +########################################### +######## TTS/STT Model Selection ########## +########################################### +# TTS_PROVIDER="native" + +# TTS_PROVIDER="openai" +# TTS_OPEN_AI_KEY=sk-example +# TTS_OPEN_AI_VOICE_MODEL=nova + +# TTS_PROVIDER="elevenlabs" +# TTS_ELEVEN_LABS_KEY= +# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel + # CLOUD DEPLOYMENT VARIRABLES ONLY # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting. # DISABLE_TELEMETRY="false" diff --git a/frontend/package.json b/frontend/package.json index ded06aa9..11e612fc 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -28,6 +28,7 @@ "react-dropzone": "^14.2.3", "react-loading-skeleton": "^3.1.0", "react-router-dom": "^6.3.0", + "react-speech-recognition": "^3.10.0", "react-tag-input-component": "^2.0.2", "react-toastify": "^9.1.3", "react-tooltip": "^5.25.2", diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 0a5ed65f..b29e6eea 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -32,6 +32,9 @@ const GeneralLLMPreference = lazy( const GeneralTranscriptionPreference = lazy( () => import("@/pages/GeneralSettings/TranscriptionPreference") ); +const GeneralAudioPreference = lazy( + () => import("@/pages/GeneralSettings/AudioPreference") +); const GeneralEmbeddingPreference = lazy( () => import("@/pages/GeneralSettings/EmbeddingPreference") ); @@ -85,6 +88,10 @@ export default function App() { } /> + } + /> } diff --git a/frontend/src/components/SettingsSidebar/index.jsx b/frontend/src/components/SettingsSidebar/index.jsx index 67797d26..6b8f79e5 100644 --- a/frontend/src/components/SettingsSidebar/index.jsx +++ b/frontend/src/components/SettingsSidebar/index.jsx @@ -21,6 +21,7 @@ import { ClosedCaptioning, EyeSlash, SplitVertical, + Microphone, } from "@phosphor-icons/react"; import useUser from "@/hooks/useUser"; import { USER_BACKGROUND_COLOR } from "@/utils/constants"; @@ -280,6 +281,14 @@ const SidebarOptions = ({ user = null }) => ( flex={true} allowedRole={["admin"]} /> +