From 026849df0224b6a8754f4103530bc015874def62 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Sun, 14 Jan 2024 16:36:17 -0800 Subject: [PATCH] normalize paths for submit URLs of `remove-documents (#598) normalize paths for submit URLs --- server/endpoints/system.js | 6 ++-- server/utils/files/index.js | 56 +++++++++++++---------------- server/utils/files/purgeDocument.js | 7 ++-- 3 files changed, 29 insertions(+), 40 deletions(-) diff --git a/server/endpoints/system.js b/server/endpoints/system.js index 345bd230..6d985065 100644 --- a/server/endpoints/system.js +++ b/server/endpoints/system.js @@ -1,7 +1,7 @@ process.env.NODE_ENV === "development" ? require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` }) : require("dotenv").config(); -const { viewLocalFiles } = require("../utils/files"); +const { viewLocalFiles, normalizePath } = require("../utils/files"); const { exportData, unpackAndOverwriteImport } = require("../utils/files/data"); const { checkProcessorAlive, @@ -401,9 +401,7 @@ function systemEndpoints(app) { app.get("/system/data-exports/:filename", (request, response) => { const exportLocation = __dirname + "/../storage/exports/"; - const sanitized = path - .normalize(request.params.filename) - .replace(/^(\.\.(\/|\\|$))+/, ""); + const sanitized = normalizePath(request.params.filename); const finalDestination = path.join(exportLocation, sanitized); if (!fs.existsSync(finalDestination)) { diff --git a/server/utils/files/index.js b/server/utils/files/index.js index b6c7a307..2ff1d60c 100644 --- a/server/utils/files/index.js +++ b/server/utils/files/index.js @@ -2,32 +2,6 @@ const fs = require("fs"); const path = require("path"); const { v5: uuidv5 } = require("uuid"); -async function collectDocumentData(folderName = null) { - if (!folderName) throw new Error("No docPath provided in request"); - const folder = - process.env.NODE_ENV === "development" - ? path.resolve(__dirname, `../../storage/documents/${folderName}`) - : path.resolve(process.env.STORAGE_DIR, `documents/${folderName}`); - - const dirExists = fs.existsSync(folder); - if (!dirExists) - throw new Error( - `No documents folder for ${folderName} - did you run collector/main.py for this element?` - ); - - const files = fs.readdirSync(folder); - const fileData = []; - files.forEach((file) => { - if (path.extname(file) === ".json") { - const filePath = path.join(folder, file); - const data = fs.readFileSync(filePath, "utf8"); - console.log(`Parsing document: ${file}`); - fileData.push(JSON.parse(data)); - } - }); - return fileData; -} - // Should take in a folder that is a subfolder of documents // eg: youtube-subject/video-123.json async function fileData(filePath = null) { @@ -35,8 +9,15 @@ async function fileData(filePath = null) { const fullPath = process.env.NODE_ENV === "development" - ? path.resolve(__dirname, `../../storage/documents/${filePath}`) - : path.resolve(process.env.STORAGE_DIR, `documents/${filePath}`); + ? path.resolve( + __dirname, + `../../storage/documents/${normalizePath(filePath)}` + ) + : path.resolve( + process.env.STORAGE_DIR, + `documents/${normalizePath(filePath)}` + ); + const fileExists = fs.existsSync(fullPath); if (!fileExists) return null; @@ -142,11 +123,18 @@ async function storeVectorResult(vectorData = [], filename = null) { async function purgeSourceDocument(filename = null) { if (!filename) return; console.log(`Purging source document of ${filename}.`); - const filePath = process.env.NODE_ENV === "development" - ? path.resolve(__dirname, `../../storage/documents`, filename) - : path.resolve(process.env.STORAGE_DIR, `documents`, filename); + ? path.resolve( + __dirname, + `../../storage/documents`, + normalizePath(filename) + ) + : path.resolve( + process.env.STORAGE_DIR, + `documents`, + normalizePath(filename) + ); if (!fs.existsSync(filePath)) return; fs.rmSync(filePath); @@ -169,12 +157,16 @@ async function purgeVectorCache(filename = null) { return; } +function normalizePath(filepath = "") { + return path.normalize(filepath).replace(/^(\.\.(\/|\\|$))+/, ""); +} + module.exports = { cachedVectorInformation, - collectDocumentData, viewLocalFiles, purgeSourceDocument, purgeVectorCache, storeVectorResult, fileData, + normalizePath, }; diff --git a/server/utils/files/purgeDocument.js b/server/utils/files/purgeDocument.js index 27fe1471..46e9d37d 100644 --- a/server/utils/files/purgeDocument.js +++ b/server/utils/files/purgeDocument.js @@ -1,7 +1,6 @@ const fs = require("fs"); const path = require("path"); - -const { purgeVectorCache, purgeSourceDocument } = require("."); +const { purgeVectorCache, purgeSourceDocument, normalizePath } = require("."); const { Document } = require("../../models/documents"); const { Workspace } = require("../../models/workspace"); @@ -22,10 +21,10 @@ async function purgeFolder(folderName) { ? path.resolve(__dirname, `../../storage/documents`) : path.resolve(process.env.STORAGE_DIR, `documents`); - const folderPath = path.resolve(documentsFolder, folderName); + const folderPath = path.resolve(documentsFolder, normalizePath(folderName)); const filenames = fs .readdirSync(folderPath) - .map((file) => path.join(folderName, file)); + .map((file) => path.join(folderPath, file)); const workspaces = await Workspace.where(); const purgePromises = [];