2023-06-08 06:31:35 +02:00
|
|
|
const { fileData } = require("../utils/files");
|
|
|
|
const { v4: uuidv4 } = require("uuid");
|
|
|
|
const { getVectorDbClass } = require("../utils/helpers");
|
2023-09-28 23:00:03 +02:00
|
|
|
const prisma = require("../utils/prisma");
|
2023-08-15 02:42:17 +02:00
|
|
|
const { Telemetry } = require("./telemetry");
|
2023-06-04 04:28:07 +02:00
|
|
|
|
|
|
|
const Document = {
|
|
|
|
forWorkspace: async function (workspaceId = null) {
|
|
|
|
if (!workspaceId) return [];
|
2023-09-28 23:00:03 +02:00
|
|
|
return await prisma.workspace_documents.findMany({
|
|
|
|
where: { workspaceId },
|
|
|
|
});
|
2023-06-04 04:28:07 +02:00
|
|
|
},
|
|
|
|
|
2023-09-28 23:00:03 +02:00
|
|
|
delete: async function (clause = {}) {
|
|
|
|
try {
|
|
|
|
await prisma.workspace_documents.deleteMany({ where: clause });
|
|
|
|
return true;
|
|
|
|
} catch (error) {
|
|
|
|
console.error(error.message);
|
|
|
|
return false;
|
|
|
|
}
|
2023-06-04 04:28:07 +02:00
|
|
|
},
|
2023-09-28 23:00:03 +02:00
|
|
|
|
|
|
|
firstWhere: async function (clause = {}) {
|
|
|
|
try {
|
|
|
|
const document = await prisma.workspace_documents.findFirst({
|
|
|
|
where: clause,
|
|
|
|
});
|
|
|
|
return document || null;
|
|
|
|
} catch (error) {
|
|
|
|
console.error(error.message);
|
|
|
|
return null;
|
|
|
|
}
|
2023-06-04 04:28:07 +02:00
|
|
|
},
|
2023-09-28 23:00:03 +02:00
|
|
|
|
2023-06-04 04:28:07 +02:00
|
|
|
addDocuments: async function (workspace, additions = []) {
|
2023-06-08 06:31:35 +02:00
|
|
|
const VectorDb = getVectorDbClass();
|
2023-11-10 22:27:53 +01:00
|
|
|
if (additions.length === 0) return { failed: [], embedded: [] };
|
2023-10-26 19:57:37 +02:00
|
|
|
const embedded = [];
|
|
|
|
const failedToEmbed = [];
|
2024-01-18 20:40:48 +01:00
|
|
|
const errors = new Set();
|
2023-06-04 04:28:07 +02:00
|
|
|
|
|
|
|
for (const path of additions) {
|
|
|
|
const data = await fileData(path);
|
|
|
|
if (!data) continue;
|
|
|
|
|
|
|
|
const docId = uuidv4();
|
2023-06-08 06:31:35 +02:00
|
|
|
const { pageContent, ...metadata } = data;
|
2023-06-04 04:28:07 +02:00
|
|
|
const newDoc = {
|
|
|
|
docId,
|
2023-06-08 06:31:35 +02:00
|
|
|
filename: path.split("/")[1],
|
2023-06-04 04:28:07 +02:00
|
|
|
docpath: path,
|
2023-09-28 23:00:03 +02:00
|
|
|
workspaceId: workspace.id,
|
2023-06-08 06:31:35 +02:00
|
|
|
metadata: JSON.stringify(metadata),
|
|
|
|
};
|
2024-01-18 20:40:48 +01:00
|
|
|
|
|
|
|
const { vectorized, error } = await VectorDb.addDocumentToNamespace(
|
2023-06-08 06:31:35 +02:00
|
|
|
workspace.slug,
|
|
|
|
{ ...data, docId },
|
|
|
|
path
|
|
|
|
);
|
2024-01-18 20:40:48 +01:00
|
|
|
|
2023-06-04 04:28:07 +02:00
|
|
|
if (!vectorized) {
|
2024-01-18 20:40:48 +01:00
|
|
|
console.error(
|
|
|
|
"Failed to vectorize",
|
|
|
|
metadata?.title || newDoc.filename
|
|
|
|
);
|
|
|
|
failedToEmbed.push(metadata?.title || newDoc.filename);
|
|
|
|
errors.add(error);
|
2023-06-04 04:28:07 +02:00
|
|
|
continue;
|
|
|
|
}
|
2023-07-27 03:06:53 +02:00
|
|
|
|
2023-09-28 23:00:03 +02:00
|
|
|
try {
|
|
|
|
await prisma.workspace_documents.create({ data: newDoc });
|
2023-10-26 19:57:37 +02:00
|
|
|
embedded.push(path);
|
2023-09-28 23:00:03 +02:00
|
|
|
} catch (error) {
|
|
|
|
console.error(error.message);
|
2023-07-27 03:06:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-15 02:42:17 +02:00
|
|
|
await Telemetry.sendTelemetry("documents_embedded_in_workspace", {
|
|
|
|
LLMSelection: process.env.LLM_PROVIDER || "openai",
|
2023-12-07 17:53:37 +01:00
|
|
|
Embedder: process.env.EMBEDDING_ENGINE || "inherit",
|
2023-08-15 02:42:17 +02:00
|
|
|
VectorDbSelection: process.env.VECTOR_DB || "pinecone",
|
|
|
|
});
|
2024-01-18 20:40:48 +01:00
|
|
|
return { failedToEmbed, errors: Array.from(errors), embedded };
|
2023-06-04 04:28:07 +02:00
|
|
|
},
|
2023-09-28 23:00:03 +02:00
|
|
|
|
2023-06-04 04:28:07 +02:00
|
|
|
removeDocuments: async function (workspace, removals = []) {
|
2023-06-08 06:31:35 +02:00
|
|
|
const VectorDb = getVectorDbClass();
|
2023-06-04 04:28:07 +02:00
|
|
|
if (removals.length === 0) return;
|
2023-07-27 03:06:53 +02:00
|
|
|
|
2023-06-04 04:28:07 +02:00
|
|
|
for (const path of removals) {
|
2023-09-28 23:00:03 +02:00
|
|
|
const document = await this.firstWhere({
|
|
|
|
docpath: path,
|
|
|
|
workspaceId: workspace.id,
|
|
|
|
});
|
2023-06-04 04:28:07 +02:00
|
|
|
if (!document) continue;
|
2023-06-08 06:31:35 +02:00
|
|
|
await VectorDb.deleteDocumentFromNamespace(
|
|
|
|
workspace.slug,
|
|
|
|
document.docId
|
|
|
|
);
|
2023-07-27 03:06:53 +02:00
|
|
|
|
2023-09-28 23:00:03 +02:00
|
|
|
try {
|
|
|
|
await prisma.workspace_documents.delete({
|
|
|
|
where: { id: document.id, workspaceId: workspace.id },
|
|
|
|
});
|
2024-01-29 18:49:22 +01:00
|
|
|
await prisma.document_vectors.deleteMany({
|
|
|
|
where: { docId: document.docId },
|
|
|
|
});
|
2023-09-28 23:00:03 +02:00
|
|
|
} catch (error) {
|
|
|
|
console.error(error.message);
|
2023-07-27 03:06:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-15 02:42:17 +02:00
|
|
|
await Telemetry.sendTelemetry("documents_removed_in_workspace", {
|
|
|
|
LLMSelection: process.env.LLM_PROVIDER || "openai",
|
2023-12-07 17:53:37 +01:00
|
|
|
Embedder: process.env.EMBEDDING_ENGINE || "inherit",
|
2023-08-15 02:42:17 +02:00
|
|
|
VectorDbSelection: process.env.VECTOR_DB || "pinecone",
|
|
|
|
});
|
2023-06-04 04:28:07 +02:00
|
|
|
return true;
|
2023-06-08 06:31:35 +02:00
|
|
|
},
|
2023-11-16 23:35:14 +01:00
|
|
|
|
|
|
|
count: async function (clause = {}, limit = null) {
|
|
|
|
try {
|
|
|
|
const count = await prisma.workspace_documents.count({
|
|
|
|
where: clause,
|
|
|
|
...(limit !== null ? { take: limit } : {}),
|
|
|
|
});
|
|
|
|
return count;
|
|
|
|
} catch (error) {
|
|
|
|
console.error("FAILED TO COUNT DOCUMENTS.", error.message);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
},
|
2023-06-08 06:31:35 +02:00
|
|
|
};
|
2023-06-04 04:28:07 +02:00
|
|
|
|
2023-06-08 06:31:35 +02:00
|
|
|
module.exports = { Document };
|