From fa4ab0f65f290f79ee968210cb88ae145c2845d1 Mon Sep 17 00:00:00 2001 From: Jason Zhang Date: Wed, 26 Jun 2024 08:15:09 +0930 Subject: [PATCH] fix: sanitize filename before writing (#1743) * fix: sanitize filename before writing Fixes: https://github.com/Mintplex-Labs/anything-llm/issues/1737 * fixup * fixup --- collector/utils/extensions/Confluence/index.js | 10 +++++----- collector/utils/files/index.js | 6 ++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/collector/utils/extensions/Confluence/index.js b/collector/utils/extensions/Confluence/index.js index 6df06310..22df1c7f 100644 --- a/collector/utils/extensions/Confluence/index.js +++ b/collector/utils/extensions/Confluence/index.js @@ -3,7 +3,7 @@ const path = require("path"); const { default: slugify } = require("slugify"); const { v4 } = require("uuid"); const UrlPattern = require("url-pattern"); -const { writeToServerDocuments } = require("../../files"); +const { writeToServerDocuments, sanitizeFileName } = require("../../files"); const { tokenizeString } = require("../../tokenizer"); const { ConfluencePagesLoader, @@ -98,11 +98,11 @@ async function loadConfluence({ pageUrl, username, accessToken }, response) { console.log( `[Confluence Loader]: Saving ${doc.metadata.title} to ${outFolder}` ); - writeToServerDocuments( - data, - `${slugify(doc.metadata.title)}-${data.id}`, - outFolderPath + + const fileName = sanitizeFileName( + `${slugify(doc.metadata.title)}-${data.id}` ); + writeToServerDocuments(data, fileName, outFolderPath); }); return { diff --git a/collector/utils/files/index.js b/collector/utils/files/index.js index 9b56bb5b..86b50c36 100644 --- a/collector/utils/files/index.js +++ b/collector/utils/files/index.js @@ -129,6 +129,11 @@ function normalizePath(filepath = "") { return result; } +function sanitizeFileName(fileName) { + if (!fileName) return fileName; + return fileName.replace(/[<>:"\/\\|?*]/g, ""); +} + module.exports = { trashFile, isTextType, @@ -137,4 +142,5 @@ module.exports = { wipeCollectorStorage, normalizePath, isWithin, + sanitizeFileName, };