anything-llm/server/endpoints/api/document/index.js
Timothy Carambat b35feede87
570 document api return object (#608)
* Add support for fetching single document in documents folder

* Add document object to upload + support link scraping via API

* hotfixes for documentation

* update api docs
2024-01-16 16:04:22 -08:00

366 lines
10 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const { Telemetry } = require("../../../models/telemetry");
const { validApiKey } = require("../../../utils/middleware/validApiKey");
const { setupMulter } = require("../../../utils/files/multer");
const {
checkProcessorAlive,
acceptedFileTypes,
processDocument,
processLink,
} = require("../../../utils/files/documentProcessor");
const {
viewLocalFiles,
findDocumentInDocuments,
} = require("../../../utils/files");
const { reqBody } = require("../../../utils/http");
const { handleUploads } = setupMulter();
function apiDocumentEndpoints(app) {
if (!app) return;
app.post(
"/v1/document/upload",
[validApiKey],
handleUploads.single("file"),
async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Upload a new file to AnythingLLM to be parsed and prepared for embedding.'
#swagger.requestBody = {
description: 'File to be uploaded.',
required: true,
type: 'file',
content: {
"multipart/form-data": {
schema: {
type: 'object',
properties: {
file: {
type: 'string',
format: 'binary',
}
}
}
}
}
}
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
success: true,
error: null,
documents: [
{
"location": "custom-documents/anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
"name": "anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
"url": "file:///Users/tim/Documents/anything-llm/collector/hotdir/anythingllm.txt",
"title": "anythingllm.txt",
"docAuthor": "Unknown",
"description": "Unknown",
"docSource": "a text file uploaded by the user.",
"chunkSource": "anythingllm.txt",
"published": "1/16/2024, 3:07:00PM",
"wordCount": 93,
"token_count_estimate": 115,
}
]
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const { originalname } = request.file;
const processingOnline = await checkProcessorAlive();
if (!processingOnline) {
response
.status(500)
.json({
success: false,
error: `Document processing API is not online. Document ${originalname} will not be processed automatically.`,
})
.end();
}
const { success, reason, documents } =
await processDocument(originalname);
if (!success) {
response
.status(500)
.json({ success: false, error: reason, documents })
.end();
return;
}
console.log(
`Document ${originalname} uploaded processed and successfully. It is now available in documents.`
);
await Telemetry.sendTelemetry("document_uploaded");
response.status(200).json({ success: true, error: null, documents });
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
}
);
app.post(
"/v1/document/upload-link",
[validApiKey],
async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Upload a valid URL for AnythingLLM to scrape and prepare for embedding.'
#swagger.requestBody = {
description: 'Link of web address to be scraped.',
required: true,
type: 'file',
content: {
"application/json": {
schema: {
type: 'object',
example: {
"link": "https://useanything.com"
}
}
}
}
}
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
success: true,
error: null,
documents: [
{
"id": "c530dbe6-bff1-4b9e-b87f-710d539d20bc",
"url": "file://useanything_com.html",
"title": "useanything_com.html",
"docAuthor": "no author found",
"description": "No description found.",
"docSource": "URL link uploaded by the user.",
"chunkSource": "https:useanything.com.html",
"published": "1/16/2024, 3:46:33PM",
"wordCount": 252,
"pageContent": "AnythingLLM is the best....",
"token_count_estimate": 447,
"location": "custom-documents/url-useanything_com-c530dbe6-bff1-4b9e-b87f-710d539d20bc.json"
}
]
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const { link } = reqBody(request);
const processingOnline = await checkProcessorAlive();
if (!processingOnline) {
response
.status(500)
.json({
success: false,
error: `Document processing API is not online. Link ${link} will not be processed automatically.`,
})
.end();
}
const { success, reason, documents } = await processLink(link);
if (!success) {
response
.status(500)
.json({ success: false, error: reason, documents })
.end();
return;
}
console.log(
`Link ${link} uploaded processed and successfully. It is now available in documents.`
);
await Telemetry.sendTelemetry("document_uploaded");
response.status(200).json({ success: true, error: null, documents });
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
}
);
app.get("/v1/documents", [validApiKey], async (_, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'List of all locally-stored documents in instance'
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
"localFiles": {
"name": "documents",
"type": "folder",
items: [
{
"name": "my-stored-document.json",
"type": "file",
"id": "bb07c334-4dab-4419-9462-9d00065a49a1",
"url": "file://my-stored-document.txt",
"title": "my-stored-document.txt",
"cached": false
},
]
}
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const localFiles = await viewLocalFiles();
response.status(200).json({ localFiles });
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
});
app.get("/v1/document/:docName", [validApiKey], async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Get a single document by its unique AnythingLLM document name'
#swagger.parameters['docName'] = {
in: 'path',
description: 'Unique document name to find (name in /documents)',
required: true,
type: 'string'
}
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
"localFiles": {
"name": "documents",
"type": "folder",
items: [
{
"name": "my-stored-document.txt-uuid1234.json",
"type": "file",
"id": "bb07c334-4dab-4419-9462-9d00065a49a1",
"url": "file://my-stored-document.txt",
"title": "my-stored-document.txt",
"cached": false
},
]
}
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const { docName } = request.params;
const document = await findDocumentInDocuments(docName);
if (!document) {
response.sendStatus(404).end();
return;
}
response.status(200).json({ document });
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
});
app.get(
"/v1/document/accepted-file-types",
[validApiKey],
async (_, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Check available filetypes and MIMEs that can be uploaded.'
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
"types": {
"application/mbox": [
".mbox"
],
"application/pdf": [
".pdf"
],
"application/vnd.oasis.opendocument.text": [
".odt"
],
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": [
".docx"
],
"text/plain": [
".txt",
".md"
]
}
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const types = await acceptedFileTypes();
if (!types) {
response.sendStatus(404).end();
return;
}
response.status(200).json({ types });
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
}
);
}
module.exports = { apiDocumentEndpoints };