From 60a00843dfc2ceb012cb4c8c62257270ae28e893 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Mon, 26 Jun 2023 17:20:09 -0700 Subject: [PATCH] add ability to purge document from custom documents as well as cleanup its associated cache file (#113) * add ability to purge document from custom documents as well as cleanup its assoicated cache file * update alert text --- .../Documents/Directory/index.jsx | 41 ++++++++++++++----- frontend/src/models/system.js | 12 ++++++ server/endpoints/system.js | 12 ++++++ server/utils/files/index.js | 41 +++++++++++++++++++ server/utils/files/purgeDocument.js | 17 ++++++++ .../utils/vectorDbProviders/chroma/index.js | 8 ++-- .../utils/vectorDbProviders/pinecone/index.js | 8 ++-- 7 files changed, 121 insertions(+), 18 deletions(-) create mode 100644 server/utils/files/purgeDocument.js diff --git a/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx index fe5e38a3..b838d0b1 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx @@ -8,6 +8,7 @@ import { Zap, } from "react-feather"; import { nFormatter } from "../../../../../utils/numbers"; +import System from "../../../../../models/system"; export default function Directory({ files, @@ -19,6 +20,16 @@ export default function Directory({ const [isExpanded, toggleExpanded] = useState(false); const [showDetails, toggleDetails] = useState(false); const [showZap, setShowZap] = useState(false); + const handleDelete = async (name, meta) => { + if ( + !window.confirm( + "Are you sure you want to delete this document?\nThis will require you to re-upload and re-embed it.\nThis document will be removed from any workspace that is currently referencing it.\nThis action is not reversible." + ) + ) + return false; + document?.getElementById(meta?.id)?.remove(); + await System.deleteDocument(name, meta); + }; if (files.type === "folder") { return ( @@ -73,7 +84,7 @@ export default function Directory({ const { name, type: _type, ...meta } = files; return ( -
+
{meta?.cached && (
{showDetails && ( -
- {Object.entries(meta).map(([key, value], i) => { - if (key === "cached") return null; - return ( -

- {key}: {value} -

- ); - })} +
+
+ {Object.entries(meta).map(([key, value], i) => { + if (key === "cached") return null; + return ( +

+ {key}: {value} +

+ ); + })} +
+
handleDelete(`${parent}/${name}`, meta)} + className="flex items-center justify-end w-full" + > + +
)}
diff --git a/frontend/src/models/system.js b/frontend/src/models/system.js index 1ce003d5..99e46269 100644 --- a/frontend/src/models/system.js +++ b/frontend/src/models/system.js @@ -86,6 +86,18 @@ const System = { return { newValues: null, error: e.message }; }); }, + deleteDocument: async (name, meta) => { + return await fetch(`${API_BASE}/system/remove-document`, { + method: "DELETE", + headers: baseHeaders(), + body: JSON.stringify({ name, meta }), + }) + .then((res) => res.ok) + .catch((e) => { + console.error(e); + return false; + }); + }, }; export default System; diff --git a/server/endpoints/system.js b/server/endpoints/system.js index ba16b2e5..5abb5c71 100644 --- a/server/endpoints/system.js +++ b/server/endpoints/system.js @@ -7,6 +7,7 @@ const { checkPythonAppAlive, acceptedFileTypes, } = require("../utils/files/documentProcessor"); +const { purgeDocument } = require("../utils/files/purgeDocument"); const { getVectorDbClass } = require("../utils/helpers"); const { updateENV } = require("../utils/helpers/updateENV"); const { reqBody, makeJWT } = require("../utils/http"); @@ -94,6 +95,17 @@ function systemEndpoints(app) { } }); + app.delete("/system/remove-document", async (request, response) => { + try { + const { name, meta } = reqBody(request); + await purgeDocument(name, meta); + response.sendStatus(200).end(); + } catch (e) { + console.log(e.message, e); + response.sendStatus(500).end(); + } + }); + app.get("/system/local-files", async (_, response) => { try { const localFiles = await viewLocalFiles(); diff --git a/server/utils/files/index.js b/server/utils/files/index.js index 0e49b540..83505f8b 100644 --- a/server/utils/files/index.js +++ b/server/utils/files/index.js @@ -141,10 +141,51 @@ async function storeVectorResult(vectorData = [], filename = null) { return; } +// Purges a file from the documents/ folder. +async function purgeSourceDocument(filename = null) { + if (!filename) return; + console.log(`Purging document of ${filename}.`); + + const filePath = + process.env.NODE_ENV === "development" + ? path.resolve(__dirname, `../../storage/documents`, filename) + : path.resolve(process.env.STORAGE_DIR, `documents`, filename); + + if (!fs.existsSync(filePath)) { + console.log(`Could not located cachefile for ${filename}`, filePath); + return; + } + + fs.rmSync(filePath); + return; +} + +// Purges a vector-cache file from the vector-cache/ folder. +async function purgeVectorCache(filename = null) { + if (!filename) return; + console.log(`Purging cached vectorized results of ${filename}.`); + + const digest = uuidv5(filename, uuidv5.URL); + const filePath = + process.env.NODE_ENV === "development" + ? path.resolve(__dirname, `../../storage/vector-cache`, `${digest}.json`) + : path.resolve(process.env.STORAGE_DIR, `vector-cache`, `${digest}.json`); + + if (!fs.existsSync(filePath)) { + console.log(`Could not located cache file for ${filename}`, filePath); + return; + } + + fs.rmSync(filePath); + return; +} + module.exports = { cachedVectorInformation, collectDocumentData, viewLocalFiles, + purgeSourceDocument, + purgeVectorCache, storeVectorResult, fileData, }; diff --git a/server/utils/files/purgeDocument.js b/server/utils/files/purgeDocument.js new file mode 100644 index 00000000..a584a426 --- /dev/null +++ b/server/utils/files/purgeDocument.js @@ -0,0 +1,17 @@ +const { purgeVectorCache, purgeSourceDocument } = require("."); +const { Document } = require("../../models/documents"); +const { Workspace } = require("../../models/workspace"); + +async function purgeDocument(filename, meta) { + const workspaces = await Workspace.where(); + for (const workspace of workspaces) { + await Document.removeDocuments(workspace, [filename]); + } + await purgeVectorCache(filename); + await purgeSourceDocument(filename); + return; +} + +module.exports = { + purgeDocument, +}; diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js index 532f629c..1fb324aa 100644 --- a/server/utils/vectorDbProviders/chroma/index.js +++ b/server/utils/vectorDbProviders/chroma/index.js @@ -350,10 +350,10 @@ const Chroma = { content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed. Context: ${contextTexts - .map((text, i) => { - return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; - }) - .join("")}`, + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("")}`, }; const memory = [prompt, ...chatHistory, { role: "user", content: input }]; const responseText = await this.getChatCompletion(this.openai(), memory, { diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js index 37198ca4..dc984f7f 100644 --- a/server/utils/vectorDbProviders/pinecone/index.js +++ b/server/utils/vectorDbProviders/pinecone/index.js @@ -321,10 +321,10 @@ const Pinecone = { content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed. Context: ${contextTexts - .map((text, i) => { - return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; - }) - .join("")}`, + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("")}`, }; const memory = [prompt, ...chatHistory, { role: "user", content: input }];