diff --git a/collector/index.js b/collector/index.js
index 9ebe5f1ce..a1142d756 100644
--- a/collector/index.js
+++ b/collector/index.js
@@ -25,7 +25,7 @@ app.use(
);
app.post("/process", async function (request, response) {
- const { filename } = reqBody(request);
+ const { filename, options = {} } = reqBody(request);
try {
const targetFilename = path
.normalize(filename)
@@ -34,7 +34,7 @@ app.post("/process", async function (request, response) {
success,
reason,
documents = [],
- } = await processSingleFile(targetFilename);
+ } = await processSingleFile(targetFilename, options);
response
.status(200)
.json({ filename: targetFilename, success, reason, documents });
diff --git a/collector/package.json b/collector/package.json
index d145ab865..8a0441d78 100644
--- a/collector/package.json
+++ b/collector/package.json
@@ -33,6 +33,7 @@
"moment": "^2.29.4",
"multer": "^1.4.5-lts.1",
"officeparser": "^4.0.5",
+ "openai": "^3.2.1",
"pdf-parse": "^1.1.1",
"puppeteer": "~21.5.2",
"slugify": "^1.6.6",
@@ -46,4 +47,4 @@
"nodemon": "^2.0.22",
"prettier": "^2.4.1"
}
-}
+}
\ No newline at end of file
diff --git a/collector/processSingleFile/convert/asAudio.js b/collector/processSingleFile/convert/asAudio.js
index 15ae5cf00..170426e40 100644
--- a/collector/processSingleFile/convert/asAudio.js
+++ b/collector/processSingleFile/convert/asAudio.js
@@ -1,5 +1,3 @@
-const fs = require("fs");
-const path = require("path");
const { v4 } = require("uuid");
const {
createdDate,
@@ -9,39 +7,35 @@ const {
const { tokenizeString } = require("../../utils/tokenizer");
const { default: slugify } = require("slugify");
const { LocalWhisper } = require("../../utils/WhisperProviders/localWhisper");
+const { OpenAiWhisper } = require("../../utils/WhisperProviders/OpenAiWhisper");
-async function asAudio({ fullFilePath = "", filename = "" }) {
- const whisper = new LocalWhisper();
+const WHISPER_PROVIDERS = {
+ openai: OpenAiWhisper,
+ local: LocalWhisper,
+};
+
+async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
+ const WhisperProvider = WHISPER_PROVIDERS.hasOwnProperty(
+ options?.whisperProvider
+ )
+ ? WHISPER_PROVIDERS[options?.whisperProvider]
+ : WHISPER_PROVIDERS.local;
console.log(`-- Working ${filename} --`);
- const transcriberPromise = new Promise((resolve) =>
- whisper.client().then((client) => resolve(client))
- );
- const audioDataPromise = new Promise((resolve) =>
- convertToWavAudioData(fullFilePath).then((audioData) => resolve(audioData))
- );
- const [audioData, transcriber] = await Promise.all([
- audioDataPromise,
- transcriberPromise,
- ]);
+ const whisper = new WhisperProvider({ options });
+ const { content, error } = await whisper.processFile(fullFilePath, filename);
- if (!audioData) {
- console.error(`Failed to parse content from ${filename}.`);
+ if (!!error) {
+ console.error(`Error encountered for parsing of ${filename}.`);
trashFile(fullFilePath);
return {
success: false,
- reason: `Failed to parse content from ${filename}.`,
+ reason: error,
documents: [],
};
}
- console.log(`[Model Working]: Transcribing audio data to text`);
- const { text: content } = await transcriber(audioData, {
- chunk_length_s: 30,
- stride_length_s: 5,
- });
-
- if (!content.length) {
+ if (!content?.length) {
console.error(`Resulting text content was empty for ${filename}.`);
trashFile(fullFilePath);
return {
@@ -76,79 +70,4 @@ async function asAudio({ fullFilePath = "", filename = "" }) {
return { success: true, reason: null, documents: [document] };
}
-async function convertToWavAudioData(sourcePath) {
- try {
- let buffer;
- const wavefile = require("wavefile");
- const ffmpeg = require("fluent-ffmpeg");
- const outFolder = path.resolve(__dirname, `../../storage/tmp`);
- if (!fs.existsSync(outFolder)) fs.mkdirSync(outFolder, { recursive: true });
-
- const fileExtension = path.extname(sourcePath).toLowerCase();
- if (fileExtension !== ".wav") {
- console.log(
- `[Conversion Required] ${fileExtension} file detected - converting to .wav`
- );
- const outputFile = path.resolve(outFolder, `${v4()}.wav`);
- const convert = new Promise((resolve) => {
- ffmpeg(sourcePath)
- .toFormat("wav")
- .on("error", (error) => {
- console.error(`[Conversion Error] ${error.message}`);
- resolve(false);
- })
- .on("progress", (progress) =>
- console.log(
- `[Conversion Processing]: ${progress.targetSize}KB converted`
- )
- )
- .on("end", () => {
- console.log("[Conversion Complete]: File converted to .wav!");
- resolve(true);
- })
- .save(outputFile);
- });
- const success = await convert;
- if (!success)
- throw new Error(
- "[Conversion Failed]: Could not convert file to .wav format!"
- );
-
- const chunks = [];
- const stream = fs.createReadStream(outputFile);
- for await (let chunk of stream) chunks.push(chunk);
- buffer = Buffer.concat(chunks);
- fs.rmSync(outputFile);
- } else {
- const chunks = [];
- const stream = fs.createReadStream(sourcePath);
- for await (let chunk of stream) chunks.push(chunk);
- buffer = Buffer.concat(chunks);
- }
-
- const wavFile = new wavefile.WaveFile(buffer);
- wavFile.toBitDepth("32f");
- wavFile.toSampleRate(16000);
-
- let audioData = wavFile.getSamples();
- if (Array.isArray(audioData)) {
- if (audioData.length > 1) {
- const SCALING_FACTOR = Math.sqrt(2);
-
- // Merge channels into first channel to save memory
- for (let i = 0; i < audioData[0].length; ++i) {
- audioData[0][i] =
- (SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
- }
- }
- audioData = audioData[0];
- }
-
- return audioData;
- } catch (error) {
- console.error(`convertToWavAudioData`, error);
- return null;
- }
-}
-
module.exports = asAudio;
diff --git a/collector/processSingleFile/index.js b/collector/processSingleFile/index.js
index 569a2cde2..5d9e6a38a 100644
--- a/collector/processSingleFile/index.js
+++ b/collector/processSingleFile/index.js
@@ -7,7 +7,7 @@ const {
const { trashFile, isTextType } = require("../utils/files");
const RESERVED_FILES = ["__HOTDIR__.md"];
-async function processSingleFile(targetFilename) {
+async function processSingleFile(targetFilename, options = {}) {
const fullFilePath = path.resolve(WATCH_DIRECTORY, targetFilename);
if (RESERVED_FILES.includes(targetFilename))
return {
@@ -54,6 +54,7 @@ async function processSingleFile(targetFilename) {
return await FileTypeProcessor({
fullFilePath,
filename: targetFilename,
+ options,
});
}
diff --git a/collector/utils/WhisperProviders/OpenAiWhisper.js b/collector/utils/WhisperProviders/OpenAiWhisper.js
new file mode 100644
index 000000000..3b9d08e6a
--- /dev/null
+++ b/collector/utils/WhisperProviders/OpenAiWhisper.js
@@ -0,0 +1,44 @@
+const fs = require("fs");
+
+class OpenAiWhisper {
+ constructor({ options }) {
+ const { Configuration, OpenAIApi } = require("openai");
+ if (!options.openAiKey) throw new Error("No OpenAI API key was set.");
+
+ const config = new Configuration({
+ apiKey: options.openAiKey,
+ });
+ this.openai = new OpenAIApi(config);
+ this.model = "whisper-1";
+ this.temperature = 0;
+ this.#log("Initialized.");
+ }
+
+ #log(text, ...args) {
+ console.log(`\x1b[32m[OpenAiWhisper]\x1b[0m ${text}`, ...args);
+ }
+
+ async processFile(fullFilePath) {
+ return await this.openai
+ .createTranscription(
+ fs.createReadStream(fullFilePath),
+ this.model,
+ undefined,
+ "text",
+ this.temperature
+ )
+ .then((res) => {
+ if (res.hasOwnProperty("data"))
+ return { content: res.data, error: null };
+ return { content: "", error: "No content was able to be transcribed." };
+ })
+ .catch((e) => {
+ this.#log(`Could not get any response from openai whisper`, e.message);
+ return { content: "", error: e.message };
+ });
+ }
+}
+
+module.exports = {
+ OpenAiWhisper,
+};
diff --git a/collector/utils/WhisperProviders/localWhisper.js b/collector/utils/WhisperProviders/localWhisper.js
index 6503e2021..46dbe226b 100644
--- a/collector/utils/WhisperProviders/localWhisper.js
+++ b/collector/utils/WhisperProviders/localWhisper.js
@@ -1,5 +1,6 @@
-const path = require("path");
const fs = require("fs");
+const path = require("path");
+const { v4 } = require("uuid");
class LocalWhisper {
constructor() {
@@ -16,12 +17,94 @@ class LocalWhisper {
// Make directory when it does not exist in existing installations
if (!fs.existsSync(this.cacheDir))
fs.mkdirSync(this.cacheDir, { recursive: true });
+
+ this.#log("Initialized.");
+ }
+
+ #log(text, ...args) {
+ console.log(`\x1b[32m[LocalWhisper]\x1b[0m ${text}`, ...args);
+ }
+
+ async #convertToWavAudioData(sourcePath) {
+ try {
+ let buffer;
+ const wavefile = require("wavefile");
+ const ffmpeg = require("fluent-ffmpeg");
+ const outFolder = path.resolve(__dirname, `../../storage/tmp`);
+ if (!fs.existsSync(outFolder))
+ fs.mkdirSync(outFolder, { recursive: true });
+
+ const fileExtension = path.extname(sourcePath).toLowerCase();
+ if (fileExtension !== ".wav") {
+ this.#log(
+ `File conversion required! ${fileExtension} file detected - converting to .wav`
+ );
+ const outputFile = path.resolve(outFolder, `${v4()}.wav`);
+ const convert = new Promise((resolve) => {
+ ffmpeg(sourcePath)
+ .toFormat("wav")
+ .on("error", (error) => {
+ this.#log(`Conversion Error! ${error.message}`);
+ resolve(false);
+ })
+ .on("progress", (progress) =>
+ this.#log(
+ `Conversion Processing! ${progress.targetSize}KB converted`
+ )
+ )
+ .on("end", () => {
+ this.#log(`Conversion Complete! File converted to .wav!`);
+ resolve(true);
+ })
+ .save(outputFile);
+ });
+ const success = await convert;
+ if (!success)
+ throw new Error(
+ "[Conversion Failed]: Could not convert file to .wav format!"
+ );
+
+ const chunks = [];
+ const stream = fs.createReadStream(outputFile);
+ for await (let chunk of stream) chunks.push(chunk);
+ buffer = Buffer.concat(chunks);
+ fs.rmSync(outputFile);
+ } else {
+ const chunks = [];
+ const stream = fs.createReadStream(sourcePath);
+ for await (let chunk of stream) chunks.push(chunk);
+ buffer = Buffer.concat(chunks);
+ }
+
+ const wavFile = new wavefile.WaveFile(buffer);
+ wavFile.toBitDepth("32f");
+ wavFile.toSampleRate(16000);
+
+ let audioData = wavFile.getSamples();
+ if (Array.isArray(audioData)) {
+ if (audioData.length > 1) {
+ const SCALING_FACTOR = Math.sqrt(2);
+
+ // Merge channels into first channel to save memory
+ for (let i = 0; i < audioData[0].length; ++i) {
+ audioData[0][i] =
+ (SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
+ }
+ }
+ audioData = audioData[0];
+ }
+
+ return audioData;
+ } catch (error) {
+ console.error(`convertToWavAudioData`, error);
+ return null;
+ }
}
async client() {
if (!fs.existsSync(this.modelPath)) {
- console.log(
- "\x1b[34m[INFO]\x1b[0m The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)\n\n"
+ this.#log(
+ `The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)`
);
}
@@ -48,10 +131,45 @@ class LocalWhisper {
: {}),
});
} catch (error) {
- console.error("Failed to load the native whisper model:", error);
+ this.#log("Failed to load the native whisper model:", error);
throw error;
}
}
+
+ async processFile(fullFilePath, filename) {
+ try {
+ const transcriberPromise = new Promise((resolve) =>
+ this.client().then((client) => resolve(client))
+ );
+ const audioDataPromise = new Promise((resolve) =>
+ this.#convertToWavAudioData(fullFilePath).then((audioData) =>
+ resolve(audioData)
+ )
+ );
+ const [audioData, transcriber] = await Promise.all([
+ audioDataPromise,
+ transcriberPromise,
+ ]);
+
+ if (!audioData) {
+ this.#log(`Failed to parse content from ${filename}.`);
+ return {
+ content: null,
+ error: `Failed to parse content from ${filename}.`,
+ };
+ }
+
+ this.#log(`Transcribing audio data to text...`);
+ const { text } = await transcriber(audioData, {
+ chunk_length_s: 30,
+ stride_length_s: 5,
+ });
+
+ return { content: text, error: null };
+ } catch (error) {
+ return { content: null, error: error.message };
+ }
+ }
}
module.exports = {
diff --git a/collector/yarn.lock b/collector/yarn.lock
index bf979c86c..3bb0f1ea7 100644
--- a/collector/yarn.lock
+++ b/collector/yarn.lock
@@ -372,6 +372,13 @@ asynckit@^0.4.0:
resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==
+axios@^0.26.0:
+ version "0.26.1"
+ resolved "https://registry.yarnpkg.com/axios/-/axios-0.26.1.tgz#1ede41c51fcf51bbbd6fd43669caaa4f0495aaa9"
+ integrity sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==
+ dependencies:
+ follow-redirects "^1.14.8"
+
b4a@^1.6.4:
version "1.6.4"
resolved "https://registry.yarnpkg.com/b4a/-/b4a-1.6.4.tgz#ef1c1422cae5ce6535ec191baeed7567443f36c9"
@@ -1203,6 +1210,11 @@ fluent-ffmpeg@^2.1.2:
async ">=0.2.9"
which "^1.1.1"
+follow-redirects@^1.14.8:
+ version "1.15.6"
+ resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.6.tgz#7f815c0cda4249c74ff09e95ef97c23b5fd0399b"
+ integrity sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==
+
form-data-encoder@1.7.2:
version "1.7.2"
resolved "https://registry.yarnpkg.com/form-data-encoder/-/form-data-encoder-1.7.2.tgz#1f1ae3dccf58ed4690b86d87e4f57c654fbab040"
@@ -2304,6 +2316,14 @@ onnxruntime-web@1.14.0:
onnxruntime-common "~1.14.0"
platform "^1.3.6"
+openai@^3.2.1:
+ version "3.3.0"
+ resolved "https://registry.yarnpkg.com/openai/-/openai-3.3.0.tgz#a6408016ad0945738e1febf43f2fccca83a3f532"
+ integrity sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==
+ dependencies:
+ axios "^0.26.0"
+ form-data "^4.0.0"
+
openai@^4.19.0:
version "4.20.1"
resolved "https://registry.yarnpkg.com/openai/-/openai-4.20.1.tgz#afa0d496d125b5a0f6cebcb4b9aeabf71e00214e"
diff --git a/docker/.env.example b/docker/.env.example
index ae4913dc4..ed6fd3bce 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -131,6 +131,16 @@ GID='1000'
# ASTRA_DB_APPLICATION_TOKEN=
# ASTRA_DB_ENDPOINT=
+###########################################
+######## Audio Model Selection ############
+###########################################
+# (default) use built-in whisper-small model.
+# WHISPER_PROVIDER="local"
+
+# use openai hosted whisper model.
+# WHISPER_PROVIDER="openai"
+# OPEN_AI_KEY=sk-xxxxxxxx
+
# CLOUD DEPLOYMENT VARIRABLES ONLY
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
# DISABLE_TELEMETRY="false"
diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx
index 86f6eb08a..9ef160e72 100644
--- a/frontend/src/App.jsx
+++ b/frontend/src/App.jsx
@@ -29,6 +29,9 @@ const GeneralApiKeys = lazy(() => import("@/pages/GeneralSettings/ApiKeys"));
const GeneralLLMPreference = lazy(
() => import("@/pages/GeneralSettings/LLMPreference")
);
+const GeneralTranscriptionPreference = lazy(
+ () => import("@/pages/GeneralSettings/TranscriptionPreference")
+);
const GeneralEmbeddingPreference = lazy(
() => import("@/pages/GeneralSettings/EmbeddingPreference")
);
@@ -47,6 +50,9 @@ const EmbedConfigSetup = lazy(
() => import("@/pages/GeneralSettings/EmbedConfigs")
);
const EmbedChats = lazy(() => import("@/pages/GeneralSettings/EmbedChats"));
+const PrivacyAndData = lazy(
+ () => import("@/pages/GeneralSettings/PrivacyAndData")
+);
export default function App() {
return (
@@ -76,6 +82,12 @@ export default function App() {
path="/settings/llm-preference"
element={
Name
Date
Kind
@@ -148,7 +148,7 @@ const PinAlert = memo(() => {
+ Using the local whisper model on machines with limited RAM or CPU
+ can stall AnythingLLM when processing media files.
+
+ We recommend at least 2GB of RAM and upload files <10Mb.
+
+
+
+ The built-in model will automatically download on the first use.
+
+
Welcome to your new workspace.
-- To get started either{" "} - - upload a document - - or send a chat. -
+ {!user || user.role !== "default" ? ( ++ To get started either{" "} + + upload a document + + or send a chat. +
+ ) : ( ++ To get started send a chat. +
+ )}+ Privacy & Data-Handling +
++ This is your configuration for how connected third party providers + and AnythingLLM handle your data. +
++ {LLM_SELECTION_PRIVACY[llmChoice].name} +
++ {EMBEDDING_ENGINE_PRIVACY[embeddingEngine].name} +
++ {VECTOR_DB_PRIVACY[vectorDb].name} +
++ All events do not record IP-address and contain{" "} + no identifying content, settings, chats, or other non-usage + based information. To see the list of event tags collected you can + look on{" "} + + Github here + + . +
++ As an open-source project we respect your right to privacy. We are + dedicated to building the best solution for integrating AI and + documents privately and securely. If you do decide to turn off + telemetry all we ask is to consider sending us feedback and thoughts + so that we can continue to improve AnythingLLM for you.{" "} + + team@mintplexlabs.com + + . +
+