mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-19 12:40:09 +01:00
719521c307
* wip: init refactor of document processor to JS * add NodeJs PDF support * wip: partity with python processor feat: add pptx support * fix: forgot files * Remove python scripts totally * wip:update docker to boot new collector * add package.json support * update dockerfile for new build * update gitignore and linting * add more protections on file lookup * update package.json * test build * update docker commands to use cap-add=SYS_ADMIN so web scraper can run update all scripts to reflect this remove docker build for branch
52 lines
1.3 KiB
JavaScript
52 lines
1.3 KiB
JavaScript
const path = require("path");
|
|
const fs = require("fs");
|
|
const {
|
|
WATCH_DIRECTORY,
|
|
SUPPORTED_FILETYPE_CONVERTERS,
|
|
} = require("../utils/constants");
|
|
const { trashFile } = require("../utils/files");
|
|
|
|
RESERVED_FILES = ["__HOTDIR__.md"];
|
|
|
|
async function processSingleFile(targetFilename) {
|
|
const fullFilePath = path.resolve(WATCH_DIRECTORY, targetFilename);
|
|
if (RESERVED_FILES.includes(targetFilename))
|
|
return {
|
|
success: false,
|
|
reason: "Filename is a reserved filename and cannot be processed.",
|
|
};
|
|
if (!fs.existsSync(fullFilePath))
|
|
return {
|
|
success: false,
|
|
reason: "File does not exist in upload directory.",
|
|
};
|
|
|
|
const fileExtension = path.extname(fullFilePath).toLowerCase();
|
|
if (!fileExtension) {
|
|
return {
|
|
success: false,
|
|
reason: `No file extension found. This file cannot be processed.`,
|
|
};
|
|
}
|
|
|
|
if (!Object.keys(SUPPORTED_FILETYPE_CONVERTERS).includes(fileExtension)) {
|
|
trashFile(fullFilePath);
|
|
return {
|
|
success: false,
|
|
reason: `File extension ${fileExtension} not supported for parsing.`,
|
|
};
|
|
}
|
|
|
|
const FileTypeProcessor = require(SUPPORTED_FILETYPE_CONVERTERS[
|
|
fileExtension
|
|
]);
|
|
return await FileTypeProcessor({
|
|
fullFilePath,
|
|
filename: targetFilename,
|
|
});
|
|
}
|
|
|
|
module.exports = {
|
|
processSingleFile,
|
|
};
|