const fs = require("fs"); const path = require("path"); const { v4 } = require("uuid"); const { createdDate, trashFile, writeToServerDocuments, } = require("../../utils/files"); const { tokenizeString } = require("../../utils/tokenizer"); const { default: slugify } = require("slugify"); const { LocalWhisper } = require("../../utils/WhisperProviders/localWhisper"); async function asAudio({ fullFilePath = "", filename = "" }) { const whisper = new LocalWhisper(); console.log(`-- Working ${filename} --`); const transcriberPromise = new Promise((resolve) => whisper.client().then((client) => resolve(client)) ); const audioDataPromise = new Promise((resolve) => convertToWavAudioData(fullFilePath).then((audioData) => resolve(audioData)) ); const [audioData, transcriber] = await Promise.all([ audioDataPromise, transcriberPromise, ]); if (!audioData) { console.error(`Failed to parse content from ${filename}.`); trashFile(fullFilePath); return { success: false, reason: `Failed to parse content from ${filename}.`, documents: [], }; } console.log(`[Model Working]: Transcribing audio data to text`); const { text: content } = await transcriber(audioData, { chunk_length_s: 30, stride_length_s: 5, }); if (!content.length) { console.error(`Resulting text content was empty for ${filename}.`); trashFile(fullFilePath); return { success: false, reason: `No text content found in ${filename}.`, documents: [], }; } const data = { id: v4(), url: "file://" + fullFilePath, title: filename, docAuthor: "no author found", description: "No description found.", docSource: "pdf file uploaded by the user.", chunkSource: "", published: createdDate(fullFilePath), wordCount: content.split(" ").length, pageContent: content, token_count_estimate: tokenizeString(content).length, }; const document = writeToServerDocuments( data, `${slugify(filename)}-${data.id}` ); trashFile(fullFilePath); console.log( `[SUCCESS]: ${filename} transcribed, converted & ready for embedding.\n` ); return { success: true, reason: null, documents: [document] }; } async function convertToWavAudioData(sourcePath) { try { let buffer; const wavefile = require("wavefile"); const ffmpeg = require("fluent-ffmpeg"); const outFolder = path.resolve(__dirname, `../../storage/tmp`); if (!fs.existsSync(outFolder)) fs.mkdirSync(outFolder, { recursive: true }); const fileExtension = path.extname(sourcePath).toLowerCase(); if (fileExtension !== ".wav") { console.log( `[Conversion Required] ${fileExtension} file detected - converting to .wav` ); const outputFile = path.resolve(outFolder, `${v4()}.wav`); const convert = new Promise((resolve) => { ffmpeg(sourcePath) .toFormat("wav") .on("error", (error) => { console.error(`[Conversion Error] ${error.message}`); resolve(false); }) .on("progress", (progress) => console.log( `[Conversion Processing]: ${progress.targetSize}KB converted` ) ) .on("end", () => { console.log("[Conversion Complete]: File converted to .wav!"); resolve(true); }) .save(outputFile); }); const success = await convert; if (!success) throw new Error( "[Conversion Failed]: Could not convert file to .wav format!" ); const chunks = []; const stream = fs.createReadStream(outputFile); for await (let chunk of stream) chunks.push(chunk); buffer = Buffer.concat(chunks); fs.rmSync(outputFile); } else { const chunks = []; const stream = fs.createReadStream(sourcePath); for await (let chunk of stream) chunks.push(chunk); buffer = Buffer.concat(chunks); } const wavFile = new wavefile.WaveFile(buffer); wavFile.toBitDepth("32f"); wavFile.toSampleRate(16000); let audioData = wavFile.getSamples(); if (Array.isArray(audioData)) { if (audioData.length > 1) { const SCALING_FACTOR = Math.sqrt(2); // Merge channels into first channel to save memory for (let i = 0; i < audioData[0].length; ++i) { audioData[0][i] = (SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2; } } audioData = audioData[0]; } return audioData; } catch (error) { console.error(`convertToWavAudioData`, error); return null; } } module.exports = asAudio;