diff --git a/collector/package.json b/collector/package.json
index 4ce85e68e..bf6498c06 100644
--- a/collector/package.json
+++ b/collector/package.json
@@ -33,6 +33,7 @@
"mime": "^3.0.0",
"moment": "^2.29.4",
"node-html-parser": "^6.1.13",
+ "node-xlsx": "^0.24.0",
"officeparser": "^4.0.5",
"openai": "4.38.5",
"pdf-parse": "^1.1.1",
@@ -48,4 +49,4 @@
"nodemon": "^2.0.22",
"prettier": "^2.4.1"
}
-}
\ No newline at end of file
+}
diff --git a/collector/processSingleFile/convert/asXlsx.js b/collector/processSingleFile/convert/asXlsx.js
new file mode 100644
index 000000000..f21c6f1d9
--- /dev/null
+++ b/collector/processSingleFile/convert/asXlsx.js
@@ -0,0 +1,113 @@
+const { v4 } = require("uuid");
+const xlsx = require("node-xlsx").default;
+const path = require("path");
+const fs = require("fs");
+const {
+ createdDate,
+ trashFile,
+ writeToServerDocuments,
+} = require("../../utils/files");
+const { tokenizeString } = require("../../utils/tokenizer");
+const { default: slugify } = require("slugify");
+
+function convertToCSV(data) {
+ return data
+ .map((row) =>
+ row
+ .map((cell) => {
+ if (cell === null || cell === undefined) return "";
+ if (typeof cell === "string" && cell.includes(","))
+ return `"${cell}"`;
+ return cell;
+ })
+ .join(",")
+ )
+ .join("\n");
+}
+
+async function asXlsx({ fullFilePath = "", filename = "" }) {
+ const documents = [];
+ const folderName = slugify(`${path.basename(filename)}-${v4().slice(0, 4)}`, {
+ lower: true,
+ trim: true,
+ });
+
+ const outFolderPath =
+ process.env.NODE_ENV === "development"
+ ? path.resolve(
+ __dirname,
+ `../../../server/storage/documents/${folderName}`
+ )
+ : path.resolve(process.env.STORAGE_DIR, `documents/${folderName}`);
+
+ try {
+ const workSheetsFromFile = xlsx.parse(fullFilePath);
+ if (!fs.existsSync(outFolderPath))
+ fs.mkdirSync(outFolderPath, { recursive: true });
+
+ for (const sheet of workSheetsFromFile) {
+ try {
+ const { name, data } = sheet;
+ const content = convertToCSV(data);
+
+ if (!content?.length) {
+ console.warn(`Sheet "${name}" is empty. Skipping.`);
+ continue;
+ }
+
+ console.log(`-- Processing sheet: ${name} --`);
+ const sheetData = {
+ id: v4(),
+ url: `file://${path.join(outFolderPath, `${slugify(name)}.csv`)}`,
+ title: `${filename} - Sheet:${name}`,
+ docAuthor: "Unknown",
+ description: `Spreadsheet data from sheet: ${name}`,
+ docSource: "an xlsx file uploaded by the user.",
+ chunkSource: "",
+ published: createdDate(fullFilePath),
+ wordCount: content.split(/\s+/).length,
+ pageContent: content,
+ token_count_estimate: tokenizeString(content).length,
+ };
+
+ const document = writeToServerDocuments(
+ sheetData,
+ `sheet-${slugify(name)}`,
+ outFolderPath
+ );
+ documents.push(document);
+ console.log(
+ `[SUCCESS]: Sheet "${name}" converted & ready for embedding.`
+ );
+ } catch (err) {
+ console.error(`Error processing sheet "${name}":`, err);
+ continue;
+ }
+ }
+ } catch (err) {
+ console.error("Could not process xlsx file!", err);
+ return {
+ success: false,
+ reason: `Error processing ${filename}: ${err.message}`,
+ documents: [],
+ };
+ } finally {
+ trashFile(fullFilePath);
+ }
+
+ if (documents.length === 0) {
+ console.error(`No valid sheets found in ${filename}.`);
+ return {
+ success: false,
+ reason: `No valid sheets found in ${filename}.`,
+ documents: [],
+ };
+ }
+
+ console.log(
+ `[SUCCESS]: ${filename} fully processed. Created ${documents.length} document(s).\n`
+ );
+ return { success: true, reason: null, documents };
+}
+
+module.exports = asXlsx;
diff --git a/collector/utils/constants.js b/collector/utils/constants.js
index ee9ad22ae..c7beeb4b2 100644
--- a/collector/utils/constants.js
+++ b/collector/utils/constants.js
@@ -11,6 +11,10 @@ const ACCEPTED_MIMES = {
".pptx",
],
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": [
+ ".xlsx",
+ ],
+
"application/vnd.oasis.opendocument.text": [".odt"],
"application/vnd.oasis.opendocument.presentation": [".odp"],
@@ -41,6 +45,8 @@ const SUPPORTED_FILETYPE_CONVERTERS = {
".odt": "./convert/asOfficeMime.js",
".odp": "./convert/asOfficeMime.js",
+ ".xlsx": "./convert/asXlsx.js",
+
".mbox": "./convert/asMbox.js",
".epub": "./convert/asEPub.js",
diff --git a/collector/yarn.lock b/collector/yarn.lock
index 2786692e0..f991b43fa 100644
--- a/collector/yarn.lock
+++ b/collector/yarn.lock
@@ -2326,6 +2326,13 @@ node-html-parser@^6.1.13:
css-select "^5.1.0"
he "1.2.0"
+node-xlsx@^0.24.0:
+ version "0.24.0"
+ resolved "https://registry.yarnpkg.com/node-xlsx/-/node-xlsx-0.24.0.tgz#a6a365acb18ad37c66c2b254b6ebe0c22dc9dc6f"
+ integrity sha512-1olwK48XK9nXZsyH/FCltvGrQYvXXZuxVitxXXv2GIuRm51aBi1+5KwR4rWM4KeO61sFU+00913WLZTD+AcXEg==
+ dependencies:
+ xlsx "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz"
+
nodemailer@6.9.13:
version "6.9.13"
resolved "https://registry.yarnpkg.com/nodemailer/-/nodemailer-6.9.13.tgz#5b292bf1e92645f4852ca872c56a6ba6c4a3d3d6"
@@ -3528,6 +3535,10 @@ ws@8.14.2:
resolved "https://registry.yarnpkg.com/ws/-/ws-8.14.2.tgz#6c249a806eb2db7a20d26d51e7709eab7b2e6c7f"
integrity sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==
+"xlsx@https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz":
+ version "0.20.2"
+ resolved "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz#0f64eeed3f1a46e64724620c3553f2dbd3cd2d7d"
+
xml2js@^0.6.2:
version "0.6.2"
resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.6.2.tgz#dd0b630083aa09c161e25a4d0901e2b2a929b499"
diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/ActionMenu/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/ActionMenu/index.jsx
index de7df30f8..0e228c9a3 100644
--- a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/ActionMenu/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/ActionMenu/index.jsx
@@ -53,17 +53,17 @@ function ActionMenu({ chatId, forkThread, isEditing, role }) {