anything-llm/server/models/documents.js
Timothy Carambat a8ec0d9584
Compensate for upper OpenAI emedding limit chunk size (#292)
Limit is due to POST body max size. Sufficiently large requests will abort automatically
We should report that error back on the frontend during embedding
Update vectordb providers to return on failed
2023-10-26 10:57:37 -07:00

115 lines
3.1 KiB
JavaScript

const { fileData } = require("../utils/files");
const { v4: uuidv4 } = require("uuid");
const { getVectorDbClass } = require("../utils/helpers");
const prisma = require("../utils/prisma");
const { Telemetry } = require("./telemetry");
const Document = {
forWorkspace: async function (workspaceId = null) {
if (!workspaceId) return [];
return await prisma.workspace_documents.findMany({
where: { workspaceId },
});
},
delete: async function (clause = {}) {
try {
await prisma.workspace_documents.deleteMany({ where: clause });
return true;
} catch (error) {
console.error(error.message);
return false;
}
},
firstWhere: async function (clause = {}) {
try {
const document = await prisma.workspace_documents.findFirst({
where: clause,
});
return document || null;
} catch (error) {
console.error(error.message);
return null;
}
},
addDocuments: async function (workspace, additions = []) {
const VectorDb = getVectorDbClass();
if (additions.length === 0) return;
const embedded = [];
const failedToEmbed = [];
for (const path of additions) {
const data = await fileData(path);
if (!data) continue;
const docId = uuidv4();
const { pageContent, ...metadata } = data;
const newDoc = {
docId,
filename: path.split("/")[1],
docpath: path,
workspaceId: workspace.id,
metadata: JSON.stringify(metadata),
};
const vectorized = await VectorDb.addDocumentToNamespace(
workspace.slug,
{ ...data, docId },
path
);
if (!vectorized) {
console.error("Failed to vectorize", path);
failedToEmbed.push(path);
continue;
}
try {
await prisma.workspace_documents.create({ data: newDoc });
embedded.push(path);
} catch (error) {
console.error(error.message);
}
}
await Telemetry.sendTelemetry("documents_embedded_in_workspace", {
LLMSelection: process.env.LLM_PROVIDER || "openai",
VectorDbSelection: process.env.VECTOR_DB || "pinecone",
});
return { failed: failedToEmbed, embedded };
},
removeDocuments: async function (workspace, removals = []) {
const VectorDb = getVectorDbClass();
if (removals.length === 0) return;
for (const path of removals) {
const document = await this.firstWhere({
docpath: path,
workspaceId: workspace.id,
});
if (!document) continue;
await VectorDb.deleteDocumentFromNamespace(
workspace.slug,
document.docId
);
try {
await prisma.workspace_documents.delete({
where: { id: document.id, workspaceId: workspace.id },
});
} catch (error) {
console.error(error.message);
}
}
await Telemetry.sendTelemetry("documents_removed_in_workspace", {
LLMSelection: process.env.LLM_PROVIDER || "openai",
VectorDbSelection: process.env.VECTOR_DB || "pinecone",
});
return true;
},
};
module.exports = { Document };