const { fileData } = require("../utils/files"); const { v4: uuidv4 } = require("uuid"); const { getVectorDbClass } = require("../utils/helpers"); const prisma = require("../utils/prisma"); const { Telemetry } = require("./telemetry"); const Document = { forWorkspace: async function (workspaceId = null) { if (!workspaceId) return []; return await prisma.workspace_documents.findMany({ where: { workspaceId }, }); }, delete: async function (clause = {}) { try { await prisma.workspace_documents.deleteMany({ where: clause }); return true; } catch (error) { console.error(error.message); return false; } }, firstWhere: async function (clause = {}) { try { const document = await prisma.workspace_documents.findFirst({ where: clause, }); return document || null; } catch (error) { console.error(error.message); return null; } }, addDocuments: async function (workspace, additions = []) { const VectorDb = getVectorDbClass(); if (additions.length === 0) return { failed: [], embedded: [] }; const embedded = []; const failedToEmbed = []; const errors = new Set(); for (const path of additions) { const data = await fileData(path); if (!data) continue; const docId = uuidv4(); const { pageContent, ...metadata } = data; const newDoc = { docId, filename: path.split("/")[1], docpath: path, workspaceId: workspace.id, metadata: JSON.stringify(metadata), }; const { vectorized, error } = await VectorDb.addDocumentToNamespace( workspace.slug, { ...data, docId }, path ); if (!vectorized) { console.error( "Failed to vectorize", metadata?.title || newDoc.filename ); failedToEmbed.push(metadata?.title || newDoc.filename); errors.add(error); continue; } try { await prisma.workspace_documents.create({ data: newDoc }); embedded.push(path); } catch (error) { console.error(error.message); } } await Telemetry.sendTelemetry("documents_embedded_in_workspace", { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "pinecone", }); return { failedToEmbed, errors: Array.from(errors), embedded }; }, removeDocuments: async function (workspace, removals = []) { const VectorDb = getVectorDbClass(); if (removals.length === 0) return; for (const path of removals) { const document = await this.firstWhere({ docpath: path, workspaceId: workspace.id, }); if (!document) continue; await VectorDb.deleteDocumentFromNamespace( workspace.slug, document.docId ); try { await prisma.workspace_documents.delete({ where: { id: document.id, workspaceId: workspace.id }, }); await prisma.document_vectors.deleteMany({ where: { docId: document.docId }, }); } catch (error) { console.error(error.message); } } await Telemetry.sendTelemetry("documents_removed_in_workspace", { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "pinecone", }); return true; }, count: async function (clause = {}, limit = null) { try { const count = await prisma.workspace_documents.count({ where: clause, ...(limit !== null ? { take: limit } : {}), }); return count; } catch (error) { console.error("FAILED TO COUNT DOCUMENTS.", error.message); return 0; } }, }; module.exports = { Document };