anything-llm/collector/utils/files/index.js
Timothy Carambat 719521c307
Document Processor v2 (#442)
* wip: init refactor of document processor to JS

* add NodeJs PDF support

* wip: partity with python processor
feat: add pptx support

* fix: forgot files

* Remove python scripts totally

* wip:update docker to boot new collector

* add package.json support

* update dockerfile for new build

* update gitignore and linting

* add more protections on file lookup

* update package.json

* test build

* update docker commands to use cap-add=SYS_ADMIN so web scraper can run
update all scripts to reflect this
remove docker build for branch
2023-12-14 15:14:56 -08:00

56 lines
1.2 KiB
JavaScript

const fs = require("fs");
const path = require("path");
function trashFile(filepath) {
if (!fs.existsSync(filepath)) return;
try {
const isDir = fs.lstatSync(filepath).isDirectory();
if (isDir) return;
} catch {
return;
}
fs.rmSync(filepath);
return;
}
function createdDate(filepath) {
try {
const { birthtimeMs, birthtime } = fs.statSync(filepath);
if (birthtimeMs === 0) throw new Error("Invalid stat for file!");
return birthtime.toLocaleString();
} catch {
return "unknown";
}
}
function writeToServerDocuments(
data = {},
filename,
destinationOverride = null
) {
const destination = destinationOverride
? path.resolve(destinationOverride)
: path.resolve(
__dirname,
"../../../server/storage/documents/custom-documents"
);
if (!fs.existsSync(destination))
fs.mkdirSync(destination, { recursive: true });
const destinationFilePath = path.resolve(destination, filename);
fs.writeFileSync(
destinationFilePath + ".json",
JSON.stringify(data, null, 4),
{ encoding: "utf-8" }
);
return;
}
module.exports = {
trashFile,
createdDate,
writeToServerDocuments,
};