normalize paths for submit URLs of `remove-documents (#598)

normalize paths for submit URLs
This commit is contained in:
Timothy Carambat 2024-01-14 16:36:17 -08:00 committed by GitHub
parent e150e99e45
commit 026849df02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 40 deletions

View File

@ -1,7 +1,7 @@
process.env.NODE_ENV === "development"
? require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` })
: require("dotenv").config();
const { viewLocalFiles } = require("../utils/files");
const { viewLocalFiles, normalizePath } = require("../utils/files");
const { exportData, unpackAndOverwriteImport } = require("../utils/files/data");
const {
checkProcessorAlive,
@ -401,9 +401,7 @@ function systemEndpoints(app) {
app.get("/system/data-exports/:filename", (request, response) => {
const exportLocation = __dirname + "/../storage/exports/";
const sanitized = path
.normalize(request.params.filename)
.replace(/^(\.\.(\/|\\|$))+/, "");
const sanitized = normalizePath(request.params.filename);
const finalDestination = path.join(exportLocation, sanitized);
if (!fs.existsSync(finalDestination)) {

View File

@ -2,32 +2,6 @@ const fs = require("fs");
const path = require("path");
const { v5: uuidv5 } = require("uuid");
async function collectDocumentData(folderName = null) {
if (!folderName) throw new Error("No docPath provided in request");
const folder =
process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../storage/documents/${folderName}`)
: path.resolve(process.env.STORAGE_DIR, `documents/${folderName}`);
const dirExists = fs.existsSync(folder);
if (!dirExists)
throw new Error(
`No documents folder for ${folderName} - did you run collector/main.py for this element?`
);
const files = fs.readdirSync(folder);
const fileData = [];
files.forEach((file) => {
if (path.extname(file) === ".json") {
const filePath = path.join(folder, file);
const data = fs.readFileSync(filePath, "utf8");
console.log(`Parsing document: ${file}`);
fileData.push(JSON.parse(data));
}
});
return fileData;
}
// Should take in a folder that is a subfolder of documents
// eg: youtube-subject/video-123.json
async function fileData(filePath = null) {
@ -35,8 +9,15 @@ async function fileData(filePath = null) {
const fullPath =
process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../storage/documents/${filePath}`)
: path.resolve(process.env.STORAGE_DIR, `documents/${filePath}`);
? path.resolve(
__dirname,
`../../storage/documents/${normalizePath(filePath)}`
)
: path.resolve(
process.env.STORAGE_DIR,
`documents/${normalizePath(filePath)}`
);
const fileExists = fs.existsSync(fullPath);
if (!fileExists) return null;
@ -142,11 +123,18 @@ async function storeVectorResult(vectorData = [], filename = null) {
async function purgeSourceDocument(filename = null) {
if (!filename) return;
console.log(`Purging source document of ${filename}.`);
const filePath =
process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../storage/documents`, filename)
: path.resolve(process.env.STORAGE_DIR, `documents`, filename);
? path.resolve(
__dirname,
`../../storage/documents`,
normalizePath(filename)
)
: path.resolve(
process.env.STORAGE_DIR,
`documents`,
normalizePath(filename)
);
if (!fs.existsSync(filePath)) return;
fs.rmSync(filePath);
@ -169,12 +157,16 @@ async function purgeVectorCache(filename = null) {
return;
}
function normalizePath(filepath = "") {
return path.normalize(filepath).replace(/^(\.\.(\/|\\|$))+/, "");
}
module.exports = {
cachedVectorInformation,
collectDocumentData,
viewLocalFiles,
purgeSourceDocument,
purgeVectorCache,
storeVectorResult,
fileData,
normalizePath,
};

View File

@ -1,7 +1,6 @@
const fs = require("fs");
const path = require("path");
const { purgeVectorCache, purgeSourceDocument } = require(".");
const { purgeVectorCache, purgeSourceDocument, normalizePath } = require(".");
const { Document } = require("../../models/documents");
const { Workspace } = require("../../models/workspace");
@ -22,10 +21,10 @@ async function purgeFolder(folderName) {
? path.resolve(__dirname, `../../storage/documents`)
: path.resolve(process.env.STORAGE_DIR, `documents`);
const folderPath = path.resolve(documentsFolder, folderName);
const folderPath = path.resolve(documentsFolder, normalizePath(folderName));
const filenames = fs
.readdirSync(folderPath)
.map((file) => path.join(folderName, file));
.map((file) => path.join(folderPath, file));
const workspaces = await Workspace.where();
const purgePromises = [];