anything-llm/collector/utils/files/index.js
Timothy Carambat b35feede87
570 document api return object (#608)
* Add support for fetching single document in documents folder

* Add document object to upload + support link scraping via API

* hotfixes for documentation

* update api docs
2024-01-16 16:04:22 -08:00

101 lines
2.6 KiB
JavaScript

const fs = require("fs");
const path = require("path");
function trashFile(filepath) {
if (!fs.existsSync(filepath)) return;
try {
const isDir = fs.lstatSync(filepath).isDirectory();
if (isDir) return;
} catch {
return;
}
fs.rmSync(filepath);
return;
}
function createdDate(filepath) {
try {
const { birthtimeMs, birthtime } = fs.statSync(filepath);
if (birthtimeMs === 0) throw new Error("Invalid stat for file!");
return birthtime.toLocaleString();
} catch {
return "unknown";
}
}
function writeToServerDocuments(
data = {},
filename,
destinationOverride = null
) {
const destination = destinationOverride
? path.resolve(destinationOverride)
: path.resolve(
__dirname,
"../../../server/storage/documents/custom-documents"
);
if (!fs.existsSync(destination))
fs.mkdirSync(destination, { recursive: true });
const destinationFilePath = path.resolve(destination, filename) + ".json";
fs.writeFileSync(destinationFilePath, JSON.stringify(data, null, 4), {
encoding: "utf-8",
});
return {
...data,
// relative location string that can be passed into the /update-embeddings api
// that will work since we know the location exists and since we only allow
// 1-level deep folders this will always work. This still works for integrations like GitHub and YouTube.
location: destinationFilePath.split("/").slice(-2).join("/"),
};
}
// When required we can wipe the entire collector hotdir and tmp storage in case
// there were some large file failures that we unable to be removed a reboot will
// force remove them.
async function wipeCollectorStorage() {
const cleanHotDir = new Promise((resolve) => {
const directory = path.resolve(__dirname, "../../hotdir");
fs.readdir(directory, (err, files) => {
if (err) resolve();
for (const file of files) {
if (file === "__HOTDIR__.md") continue;
try {
fs.rmSync(path.join(directory, file));
} catch {}
}
resolve();
});
});
const cleanTmpDir = new Promise((resolve) => {
const directory = path.resolve(__dirname, "../../storage/tmp");
fs.readdir(directory, (err, files) => {
if (err) resolve();
for (const file of files) {
if (file === ".placeholder") continue;
try {
fs.rmSync(path.join(directory, file));
} catch {}
}
resolve();
});
});
await Promise.all([cleanHotDir, cleanTmpDir]);
console.log(`Collector hot directory and tmp storage wiped!`);
return;
}
module.exports = {
trashFile,
createdDate,
writeToServerDocuments,
wipeCollectorStorage,
};