From 32233974c2b70cbbd4aed0661c933b86a7c34d00 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Fri, 16 Feb 2024 12:35:53 -0800 Subject: [PATCH] Enable Alpaca JSON export format (#732) * Enable Alpaca JSON export format * Replace dom download link with filesave for browser compat Fix layout of exported json types for readability --- frontend/package.json | 3 +- .../src/pages/GeneralSettings/Chats/index.jsx | 72 ++++++++++++------- frontend/yarn.lock | 5 ++ server/utils/helpers/chat/convertTo.js | 25 ++++++- 4 files changed, 78 insertions(+), 27 deletions(-) diff --git a/frontend/package.json b/frontend/package.json index dbf99698..4eaa89dc 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -15,6 +15,7 @@ "@microsoft/fetch-event-source": "^2.0.1", "@phosphor-icons/react": "^2.0.13", "dompurify": "^3.0.8", + "file-saver": "^2.0.5", "he": "^1.2.0", "highlight.js": "^11.9.0", "lodash.debounce": "^4.0.8", @@ -58,4 +59,4 @@ "tailwindcss": "^3.3.1", "vite": "^4.3.0" } -} \ No newline at end of file +} diff --git a/frontend/src/pages/GeneralSettings/Chats/index.jsx b/frontend/src/pages/GeneralSettings/Chats/index.jsx index c44610f3..aeb341c1 100644 --- a/frontend/src/pages/GeneralSettings/Chats/index.jsx +++ b/frontend/src/pages/GeneralSettings/Chats/index.jsx @@ -8,33 +8,57 @@ import ChatRow from "./ChatRow"; import showToast from "@/utils/toast"; import System from "@/models/system"; import { CaretDown } from "@phosphor-icons/react"; +import { saveAs } from "file-saver"; + +const exportOptions = { + csv: { + name: "CSV", + mimeType: "text/csv", + fileExtension: "csv", + filenameFunc: () => { + return `anythingllm-chats-${new Date().toLocaleDateString()}`; + }, + }, + json: { + name: "JSON", + mimeType: "application/json", + fileExtension: "json", + filenameFunc: () => { + return `anythingllm-chats-${new Date().toLocaleDateString()}`; + }, + }, + jsonl: { + name: "JSONL", + mimeType: "application/jsonl", + fileExtension: "jsonl", + filenameFunc: () => { + return `anythingllm-chats-${new Date().toLocaleDateString()}-lines`; + }, + }, + jsonAlpaca: { + name: "JSON (Alpaca)", + mimeType: "application/json", + fileExtension: "json", + filenameFunc: () => { + return `anythingllm-chats-${new Date().toLocaleDateString()}-alpaca`; + }, + }, +}; + export default function WorkspaceChats() { const [showMenu, setShowMenu] = useState(false); const [exportType, setExportType] = useState("jsonl"); const menuRef = useRef(); const openMenuButton = useRef(); - const exportOptions = { - csv: { mimeType: "text/csv", fileExtension: "csv" }, - json: { mimeType: "application/json", fileExtension: "json" }, - jsonl: { mimeType: "application/jsonl", fileExtension: "jsonl" }, - }; const handleDumpChats = async () => { const chats = await System.exportChats(exportType); if (!!chats) { - const { mimeType, fileExtension } = exportOptions[exportType]; + const { name, mimeType, fileExtension, filenameFunc } = + exportOptions[exportType]; const blob = new Blob([chats], { type: mimeType }); - const link = document.createElement("a"); - link.href = window.URL.createObjectURL(blob); - link.download = `chats.${fileExtension}`; - document.body.appendChild(link); - link.click(); - window.URL.revokeObjectURL(link.href); - document.body.removeChild(link); - showToast( - `Chats exported successfully as ${fileExtension.toUpperCase()}.`, - "success" - ); + saveAs(blob, `${filenameFunc()}.${fileExtension}`); + showToast(`Chats exported successfully as ${name}.`, "success"); } else { showToast("Failed to export chats.", "error"); } @@ -79,7 +103,7 @@ export default function WorkspaceChats() { onClick={handleDumpChats} className="border border-slate-200 px-4 py-1 rounded-lg text-slate-200 text-sm items-center flex gap-x-2 hover:bg-slate-200 hover:text-slate-800" > - Export Chats to {exportType.toUpperCase()} + Export as {exportOptions[exportType].name} ))} diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 0adc28fe..4cd24e72 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -1417,6 +1417,11 @@ file-entry-cache@^6.0.1: dependencies: flat-cache "^3.0.4" +file-saver@^2.0.5: + version "2.0.5" + resolved "https://registry.yarnpkg.com/file-saver/-/file-saver-2.0.5.tgz#d61cfe2ce059f414d899e9dd6d4107ee25670c38" + integrity sha512-P9bmyZ3h/PRG+Nzga+rbdI4OEpNDzAVyy74uVO9ATgzLK6VtAsYybF/+TOCvrc0MO793d6+42lLyZTw7/ArVzA== + file-selector@^0.6.0: version "0.6.0" resolved "https://registry.yarnpkg.com/file-selector/-/file-selector-0.6.0.tgz#fa0a8d9007b829504db4d07dd4de0310b65287dc" diff --git a/server/utils/helpers/chat/convertTo.js b/server/utils/helpers/chat/convertTo.js index 2109ecbe..119c155a 100644 --- a/server/utils/helpers/chat/convertTo.js +++ b/server/utils/helpers/chat/convertTo.js @@ -4,7 +4,6 @@ const { Workspace } = require("../../../models/workspace"); const { WorkspaceChats } = require("../../../models/workspaceChats"); -// Todo: add RLHF feedbackScore field support async function convertToCSV(preparedData) { const rows = ["id,username,workspace,prompt,response,sent_at,rating"]; for (const item of preparedData) { @@ -27,7 +26,12 @@ async function convertToJSON(workspaceChatsMap) { [], Object.values(workspaceChatsMap).map((workspace) => workspace.messages) ); - return JSON.stringify(allMessages); + return JSON.stringify(allMessages, null, 4); +} + +// ref: https://raw.githubusercontent.com/gururise/AlpacaDataCleaned/main/alpaca_data.json +async function convertToJSONAlpaca(preparedData) { + return JSON.stringify(preparedData, null, 4); } async function convertToJSONL(workspaceChatsMap) { @@ -66,6 +70,19 @@ async function prepareWorkspaceChatsForExport(format = "jsonl") { return preparedData; } + if (format === "jsonAlpaca") { + const preparedData = chats.map((chat) => { + const responseJson = JSON.parse(chat.response); + return { + instruction: chat.prompt, + input: "", + output: responseJson.text, + }; + }); + + return preparedData; + } + const workspaceIds = [...new Set(chats.map((chat) => chat.workspaceId))]; const workspacesWithPrompts = await Promise.all( @@ -124,6 +141,10 @@ const exportMap = { contentType: "application/jsonl", func: convertToJSONL, }, + jsonAlpaca: { + contentType: "application/json", + func: convertToJSONAlpaca, + }, }; function escapeCsv(str) {