anything-llm/collector/utils/files/index.js
Timothy Carambat 61db981017
feat: Embed on-instance Whisper model for audio/mp4 transcribing (#449)
* feat: Embed on-instance Whisper model for audio/mp4 transcribing
resolves #329

* additional logging

* add placeholder for tmp folder in collector storage
Add cleanup of hotdir and tmp on collector boot to prevent hanging files
split loading of model and file conversion into concurrency

* update README

* update model size

* update supported filetypes
2023-12-15 11:20:13 -08:00

96 lines
2.3 KiB
JavaScript

const fs = require("fs");
const path = require("path");
function trashFile(filepath) {
if (!fs.existsSync(filepath)) return;
try {
const isDir = fs.lstatSync(filepath).isDirectory();
if (isDir) return;
} catch {
return;
}
fs.rmSync(filepath);
return;
}
function createdDate(filepath) {
try {
const { birthtimeMs, birthtime } = fs.statSync(filepath);
if (birthtimeMs === 0) throw new Error("Invalid stat for file!");
return birthtime.toLocaleString();
} catch {
return "unknown";
}
}
function writeToServerDocuments(
data = {},
filename,
destinationOverride = null
) {
const destination = destinationOverride
? path.resolve(destinationOverride)
: path.resolve(
__dirname,
"../../../server/storage/documents/custom-documents"
);
if (!fs.existsSync(destination))
fs.mkdirSync(destination, { recursive: true });
const destinationFilePath = path.resolve(destination, filename);
fs.writeFileSync(
destinationFilePath + ".json",
JSON.stringify(data, null, 4),
{ encoding: "utf-8" }
);
return;
}
// When required we can wipe the entire collector hotdir and tmp storage in case
// there were some large file failures that we unable to be removed a reboot will
// force remove them.
async function wipeCollectorStorage() {
const cleanHotDir = new Promise((resolve) => {
const directory = path.resolve(__dirname, "../../hotdir");
fs.readdir(directory, (err, files) => {
if (err) resolve();
for (const file of files) {
if (file === "__HOTDIR__.md") continue;
try {
fs.rmSync(path.join(directory, file));
} catch {}
}
resolve();
});
});
const cleanTmpDir = new Promise((resolve) => {
const directory = path.resolve(__dirname, "../../storage/tmp");
fs.readdir(directory, (err, files) => {
if (err) resolve();
for (const file of files) {
if (file === ".placeholder") continue;
try {
fs.rmSync(path.join(directory, file));
} catch {}
}
resolve();
});
});
await Promise.all([cleanHotDir, cleanTmpDir]);
console.log(`Collector hot directory and tmp storage wiped!`);
return;
}
module.exports = {
trashFile,
createdDate,
writeToServerDocuments,
wipeCollectorStorage,
};