mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-14 18:40:11 +01:00
719521c307
* wip: init refactor of document processor to JS * add NodeJs PDF support * wip: partity with python processor feat: add pptx support * fix: forgot files * Remove python scripts totally * wip:update docker to boot new collector * add package.json support * update dockerfile for new build * update gitignore and linting * add more protections on file lookup * update package.json * test build * update docker commands to use cap-add=SYS_ADMIN so web scraper can run update all scripts to reflect this remove docker build for branch
16 lines
304 B
JavaScript
16 lines
304 B
JavaScript
const { getEncoding } = require("js-tiktoken");
|
|
|
|
function tokenizeString(input = "") {
|
|
try {
|
|
const encoder = getEncoding("cl100k_base");
|
|
return encoder.encode(input);
|
|
} catch (e) {
|
|
console.error("Could not tokenize string!");
|
|
return [];
|
|
}
|
|
}
|
|
|
|
module.exports = {
|
|
tokenizeString,
|
|
};
|