anything-llm/collector/utils/constants.js
Timothy Carambat 719521c307
Document Processor v2 (#442)
* wip: init refactor of document processor to JS

* add NodeJs PDF support

* wip: partity with python processor
feat: add pptx support

* fix: forgot files

* Remove python scripts totally

* wip:update docker to boot new collector

* add package.json support

* update dockerfile for new build

* update gitignore and linting

* add more protections on file lookup

* update package.json

* test build

* update docker commands to use cap-add=SYS_ADMIN so web scraper can run
update all scripts to reflect this
remove docker build for branch
2023-12-14 15:14:56 -08:00

41 lines
996 B
JavaScript

const WATCH_DIRECTORY = require("path").resolve(__dirname, "../hotdir");
const ACCEPTED_MIMES = {
"text/plain": [".txt", ".md"],
"text/html": [".html"],
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": [
".docx",
],
"application/vnd.openxmlformats-officedocument.presentationml.presentation": [
".pptx",
],
"application/vnd.oasis.opendocument.text": [".odt"],
"application/vnd.oasis.opendocument.presentation": [".odp"],
"application/pdf": [".pdf"],
"application/mbox": [".mbox"],
};
const SUPPORTED_FILETYPE_CONVERTERS = {
".txt": "./convert/asTxt.js",
".md": "./convert/asTxt.js",
".html": "./convert/asTxt.js",
".pdf": "./convert/asPDF.js",
".docx": "./convert/asDocx.js",
".pptx": "./convert/asOfficeMime.js",
".odt": "./convert/asOfficeMime.js",
".odp": "./convert/asOfficeMime.js",
".mbox": "./convert/asMbox.js",
};
module.exports = {
SUPPORTED_FILETYPE_CONVERTERS,
WATCH_DIRECTORY,
ACCEPTED_MIMES,
};