anything-llm/server/endpoints/api/document/index.js
Sean Hatfield 4f268dfeb6
[FEAT] Document picker folders for organization (#902)
* implement alternating color rows for file picker

* implement alternating colors for workspace directory

* remove unneeded props/variables

* remove unused border classes

* WIP new folder UI

* remove unneeded expanded prop from filerow component

* folder creation UI and files object manipulation WIP

* folder creation & moving files complete

* add developer API support for creating folders and moving files

* update alternating row css for file picker

* remove unneeded props from merge

* normalize paths for folders
priority to custom docs folder
silently fail on duplicate folders

* update folder icon to custom svg

* linting and move FolderIcon to JSX

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
2024-03-20 16:10:30 -07:00

737 lines
21 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const { Telemetry } = require("../../../models/telemetry");
const { validApiKey } = require("../../../utils/middleware/validApiKey");
const { setupMulter } = require("../../../utils/files/multer");
const {
viewLocalFiles,
findDocumentInDocuments,
normalizePath,
} = require("../../../utils/files");
const { reqBody } = require("../../../utils/http");
const { EventLogs } = require("../../../models/eventLogs");
const { CollectorApi } = require("../../../utils/collectorApi");
const { handleUploads } = setupMulter();
const fs = require("fs");
const path = require("path");
const { Document } = require("../../../models/documents");
function apiDocumentEndpoints(app) {
if (!app) return;
app.post(
"/v1/document/upload",
[validApiKey],
handleUploads.single("file"),
async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Upload a new file to AnythingLLM to be parsed and prepared for embedding.'
#swagger.requestBody = {
description: 'File to be uploaded.',
required: true,
type: 'file',
content: {
"multipart/form-data": {
schema: {
type: 'object',
properties: {
file: {
type: 'string',
format: 'binary',
}
}
}
}
}
}
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
success: true,
error: null,
documents: [
{
"location": "custom-documents/anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
"name": "anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
"url": "file:///Users/tim/Documents/anything-llm/collector/hotdir/anythingllm.txt",
"title": "anythingllm.txt",
"docAuthor": "Unknown",
"description": "Unknown",
"docSource": "a text file uploaded by the user.",
"chunkSource": "anythingllm.txt",
"published": "1/16/2024, 3:07:00PM",
"wordCount": 93,
"token_count_estimate": 115,
}
]
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const Collector = new CollectorApi();
const { originalname } = request.file;
const processingOnline = await Collector.online();
if (!processingOnline) {
response
.status(500)
.json({
success: false,
error: `Document processing API is not online. Document ${originalname} will not be processed automatically.`,
})
.end();
return;
}
const { success, reason, documents } =
await Collector.processDocument(originalname);
if (!success) {
response
.status(500)
.json({ success: false, error: reason, documents })
.end();
return;
}
Collector.log(
`Document ${originalname} uploaded processed and successfully. It is now available in documents.`
);
await Telemetry.sendTelemetry("document_uploaded");
await EventLogs.logEvent("api_document_uploaded", {
documentName: originalname,
});
response.status(200).json({ success: true, error: null, documents });
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
}
);
app.post(
"/v1/document/upload-link",
[validApiKey],
async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Upload a valid URL for AnythingLLM to scrape and prepare for embedding.'
#swagger.requestBody = {
description: 'Link of web address to be scraped.',
required: true,
type: 'object',
content: {
"application/json": {
schema: {
type: 'object',
example: {
"link": "https://useanything.com"
}
}
}
}
}
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
success: true,
error: null,
documents: [
{
"id": "c530dbe6-bff1-4b9e-b87f-710d539d20bc",
"url": "file://useanything_com.html",
"title": "useanything_com.html",
"docAuthor": "no author found",
"description": "No description found.",
"docSource": "URL link uploaded by the user.",
"chunkSource": "https:useanything.com.html",
"published": "1/16/2024, 3:46:33PM",
"wordCount": 252,
"pageContent": "AnythingLLM is the best....",
"token_count_estimate": 447,
"location": "custom-documents/url-useanything_com-c530dbe6-bff1-4b9e-b87f-710d539d20bc.json"
}
]
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const Collector = new CollectorApi();
const { link } = reqBody(request);
const processingOnline = await Collector.online();
if (!processingOnline) {
response
.status(500)
.json({
success: false,
error: `Document processing API is not online. Link ${link} will not be processed automatically.`,
})
.end();
return;
}
const { success, reason, documents } =
await Collector.processLink(link);
if (!success) {
response
.status(500)
.json({ success: false, error: reason, documents })
.end();
return;
}
Collector.log(
`Link ${link} uploaded processed and successfully. It is now available in documents.`
);
await Telemetry.sendTelemetry("link_uploaded");
await EventLogs.logEvent("api_link_uploaded", {
link,
});
response.status(200).json({ success: true, error: null, documents });
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
}
);
app.post(
"/v1/document/raw-text",
[validApiKey],
async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Upload a file by specifying its raw text content and metadata values without having to upload a file.'
#swagger.requestBody = {
description: 'Text content and metadata of the file to be saved to the system. Use metadata-schema endpoint to get the possible metadata keys',
required: true,
type: 'object',
content: {
"application/json": {
schema: {
type: 'object',
example: {
"textContent": "This is the raw text that will be saved as a document in AnythingLLM.",
"metadata": {
keyOne: "valueOne",
keyTwo: "valueTwo",
etc: "etc"
}
}
}
}
}
}
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
success: true,
error: null,
documents: [
{
"id": "c530dbe6-bff1-4b9e-b87f-710d539d20bc",
"url": "file://my-document.txt",
"title": "hello-world.txt",
"docAuthor": "no author found",
"description": "No description found.",
"docSource": "My custom description set during upload",
"chunkSource": "no chunk source specified",
"published": "1/16/2024, 3:46:33PM",
"wordCount": 252,
"pageContent": "AnythingLLM is the best....",
"token_count_estimate": 447,
"location": "custom-documents/raw-my-doc-text-c530dbe6-bff1-4b9e-b87f-710d539d20bc.json"
}
]
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const Collector = new CollectorApi();
const requiredMetadata = ["title"];
const { textContent, metadata = {} } = reqBody(request);
const processingOnline = await Collector.online();
if (!processingOnline) {
response
.status(500)
.json({
success: false,
error: `Document processing API is not online. Request will not be processed.`,
})
.end();
return;
}
if (
!requiredMetadata.every(
(reqKey) =>
Object.keys(metadata).includes(reqKey) && !!metadata[reqKey]
)
) {
response
.status(422)
.json({
success: false,
error: `You are missing required metadata key:value pairs in your request. Required metadata key:values are ${requiredMetadata
.map((v) => `'${v}'`)
.join(", ")}`,
})
.end();
return;
}
if (!textContent || textContent?.length === 0) {
response
.status(422)
.json({
success: false,
error: `The 'textContent' key cannot have an empty value.`,
})
.end();
return;
}
const { success, reason, documents } = await Collector.processRawText(
textContent,
metadata
);
if (!success) {
response
.status(500)
.json({ success: false, error: reason, documents })
.end();
return;
}
Collector.log(
`Document created successfully. It is now available in documents.`
);
await Telemetry.sendTelemetry("raw_document_uploaded");
await EventLogs.logEvent("api_raw_document_uploaded");
response.status(200).json({ success: true, error: null, documents });
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
}
);
app.get("/v1/documents", [validApiKey], async (_, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'List of all locally-stored documents in instance'
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
"localFiles": {
"name": "documents",
"type": "folder",
items: [
{
"name": "my-stored-document.json",
"type": "file",
"id": "bb07c334-4dab-4419-9462-9d00065a49a1",
"url": "file://my-stored-document.txt",
"title": "my-stored-document.txt",
"cached": false
},
]
}
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const localFiles = await viewLocalFiles();
response.status(200).json({ localFiles });
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
});
app.get(
"/v1/document/accepted-file-types",
[validApiKey],
async (_, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Check available filetypes and MIMEs that can be uploaded.'
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
"types": {
"application/mbox": [
".mbox"
],
"application/pdf": [
".pdf"
],
"application/vnd.oasis.opendocument.text": [
".odt"
],
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": [
".docx"
],
"text/plain": [
".txt",
".md"
]
}
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const types = await new CollectorApi().acceptedFileTypes();
if (!types) {
response.sendStatus(404).end();
return;
}
response.status(200).json({ types });
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
}
);
app.get(
"/v1/document/metadata-schema",
[validApiKey],
async (_, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Get the known available metadata schema for when doing a raw-text upload and the acceptable type of value for each key.'
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
"schema": {
"keyOne": "string | number | nullable",
"keyTwo": "string | number | nullable",
"specialKey": "number",
"title": "string",
}
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
response.status(200).json({
schema: {
// If you are updating this be sure to update the collector METADATA_KEYS constant in /processRawText.
url: "string | nullable",
title: "string",
docAuthor: "string | nullable",
description: "string | nullable",
docSource: "string | nullable",
chunkSource: "string | nullable",
published: "epoch timestamp in ms | nullable",
},
});
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
}
);
// Be careful and place as last route to prevent override of the other /document/ GET
// endpoints!
app.get("/v1/document/:docName", [validApiKey], async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Get a single document by its unique AnythingLLM document name'
#swagger.parameters['docName'] = {
in: 'path',
description: 'Unique document name to find (name in /documents)',
required: true,
type: 'string'
}
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
"localFiles": {
"name": "documents",
"type": "folder",
items: [
{
"name": "my-stored-document.txt-uuid1234.json",
"type": "file",
"id": "bb07c334-4dab-4419-9462-9d00065a49a1",
"url": "file://my-stored-document.txt",
"title": "my-stored-document.txt",
"cached": false
},
]
}
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const { docName } = request.params;
const document = await findDocumentInDocuments(docName);
if (!document) {
response.sendStatus(404).end();
return;
}
response.status(200).json({ document });
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
});
app.post(
"/v1/document/create-folder",
[validApiKey],
async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Create a new folder inside the documents storage directory.'
#swagger.requestBody = {
description: 'Name of the folder to create.',
required: true,
type: 'object',
content: {
"application/json": {
schema: {
type: 'object',
example: {
"name": "new-folder"
}
}
}
}
}
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
success: true,
message: null
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const { name } = reqBody(request);
const storagePath = path.join(
__dirname,
"../../../storage/documents",
normalizePath(name)
);
if (fs.existsSync(storagePath)) {
response.status(500).json({
success: false,
message: "Folder by that name already exists",
});
return;
}
fs.mkdirSync(storagePath, { recursive: true });
response.status(200).json({ success: true, message: null });
} catch (e) {
console.error(e);
response.status(500).json({
success: false,
message: `Failed to create folder: ${e.message}`,
});
}
}
);
app.post(
"/v1/document/move-files",
[validApiKey],
async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Move files within the documents storage directory.'
#swagger.requestBody = {
description: 'Array of objects containing source and destination paths of files to move.',
required: true,
type: 'object',
content: {
"application/json": {
schema: {
type: 'object',
example: {
"files": [
{
"from": "custom-documents/file.txt-fc4beeeb-e436-454d-8bb4-e5b8979cb48f.json",
"to": "folder/file.txt-fc4beeeb-e436-454d-8bb4-e5b8979cb48f.json"
}
]
}
}
}
}
}
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
success: true,
message: null
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const { files } = reqBody(request);
const docpaths = files.map(({ from }) => from);
const documents = await Document.where({ docpath: { in: docpaths } });
const embeddedFiles = documents.map((doc) => doc.docpath);
const moveableFiles = files.filter(
({ from }) => !embeddedFiles.includes(from)
);
const movePromises = moveableFiles.map(({ from, to }) => {
const sourcePath = path.join(
__dirname,
"../../../storage/documents",
normalizePath(from)
);
const destinationPath = path.join(
__dirname,
"../../../storage/documents",
normalizePath(to)
);
return new Promise((resolve, reject) => {
fs.rename(sourcePath, destinationPath, (err) => {
if (err) {
console.error(`Error moving file ${from} to ${to}:`, err);
reject(err);
} else {
resolve();
}
});
});
});
Promise.all(movePromises)
.then(() => {
const unmovableCount = files.length - moveableFiles.length;
if (unmovableCount > 0) {
response.status(200).json({
success: true,
message: `${unmovableCount}/${files.length} files not moved. Unembed them from all workspaces.`,
});
} else {
response.status(200).json({
success: true,
message: null,
});
}
})
.catch((err) => {
console.error("Error moving files:", err);
response
.status(500)
.json({ success: false, message: "Failed to move some files." });
});
} catch (e) {
console.error(e);
response
.status(500)
.json({ success: false, message: "Failed to move files." });
}
}
);
}
module.exports = { apiDocumentEndpoints };