Enable Alpaca JSON export format (#732)

* Enable Alpaca JSON export format

* Replace dom download link with filesave for browser compat
Fix layout of exported json types for readability
This commit is contained in:
Timothy Carambat 2024-02-16 12:35:53 -08:00 committed by GitHub
parent ce8f65ff84
commit 32233974c2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 78 additions and 27 deletions

View File

@ -15,6 +15,7 @@
"@microsoft/fetch-event-source": "^2.0.1", "@microsoft/fetch-event-source": "^2.0.1",
"@phosphor-icons/react": "^2.0.13", "@phosphor-icons/react": "^2.0.13",
"dompurify": "^3.0.8", "dompurify": "^3.0.8",
"file-saver": "^2.0.5",
"he": "^1.2.0", "he": "^1.2.0",
"highlight.js": "^11.9.0", "highlight.js": "^11.9.0",
"lodash.debounce": "^4.0.8", "lodash.debounce": "^4.0.8",

View File

@ -8,33 +8,57 @@ import ChatRow from "./ChatRow";
import showToast from "@/utils/toast"; import showToast from "@/utils/toast";
import System from "@/models/system"; import System from "@/models/system";
import { CaretDown } from "@phosphor-icons/react"; import { CaretDown } from "@phosphor-icons/react";
import { saveAs } from "file-saver";
const exportOptions = {
csv: {
name: "CSV",
mimeType: "text/csv",
fileExtension: "csv",
filenameFunc: () => {
return `anythingllm-chats-${new Date().toLocaleDateString()}`;
},
},
json: {
name: "JSON",
mimeType: "application/json",
fileExtension: "json",
filenameFunc: () => {
return `anythingllm-chats-${new Date().toLocaleDateString()}`;
},
},
jsonl: {
name: "JSONL",
mimeType: "application/jsonl",
fileExtension: "jsonl",
filenameFunc: () => {
return `anythingllm-chats-${new Date().toLocaleDateString()}-lines`;
},
},
jsonAlpaca: {
name: "JSON (Alpaca)",
mimeType: "application/json",
fileExtension: "json",
filenameFunc: () => {
return `anythingllm-chats-${new Date().toLocaleDateString()}-alpaca`;
},
},
};
export default function WorkspaceChats() { export default function WorkspaceChats() {
const [showMenu, setShowMenu] = useState(false); const [showMenu, setShowMenu] = useState(false);
const [exportType, setExportType] = useState("jsonl"); const [exportType, setExportType] = useState("jsonl");
const menuRef = useRef(); const menuRef = useRef();
const openMenuButton = useRef(); const openMenuButton = useRef();
const exportOptions = {
csv: { mimeType: "text/csv", fileExtension: "csv" },
json: { mimeType: "application/json", fileExtension: "json" },
jsonl: { mimeType: "application/jsonl", fileExtension: "jsonl" },
};
const handleDumpChats = async () => { const handleDumpChats = async () => {
const chats = await System.exportChats(exportType); const chats = await System.exportChats(exportType);
if (!!chats) { if (!!chats) {
const { mimeType, fileExtension } = exportOptions[exportType]; const { name, mimeType, fileExtension, filenameFunc } =
exportOptions[exportType];
const blob = new Blob([chats], { type: mimeType }); const blob = new Blob([chats], { type: mimeType });
const link = document.createElement("a"); saveAs(blob, `${filenameFunc()}.${fileExtension}`);
link.href = window.URL.createObjectURL(blob); showToast(`Chats exported successfully as ${name}.`, "success");
link.download = `chats.${fileExtension}`;
document.body.appendChild(link);
link.click();
window.URL.revokeObjectURL(link.href);
document.body.removeChild(link);
showToast(
`Chats exported successfully as ${fileExtension.toUpperCase()}.`,
"success"
);
} else { } else {
showToast("Failed to export chats.", "error"); showToast("Failed to export chats.", "error");
} }
@ -79,7 +103,7 @@ export default function WorkspaceChats() {
onClick={handleDumpChats} onClick={handleDumpChats}
className="border border-slate-200 px-4 py-1 rounded-lg text-slate-200 text-sm items-center flex gap-x-2 hover:bg-slate-200 hover:text-slate-800" className="border border-slate-200 px-4 py-1 rounded-lg text-slate-200 text-sm items-center flex gap-x-2 hover:bg-slate-200 hover:text-slate-800"
> >
Export Chats to {exportType.toUpperCase()} Export as {exportOptions[exportType].name}
</button> </button>
<button <button
ref={openMenuButton} ref={openMenuButton}
@ -97,18 +121,18 @@ export default function WorkspaceChats() {
} z-20 w-fit rounded-lg absolute top-full right-0 bg-sidebar p-4 flex items-center justify-center mt-2`} } z-20 w-fit rounded-lg absolute top-full right-0 bg-sidebar p-4 flex items-center justify-center mt-2`}
> >
<div className="flex flex-col gap-y-2"> <div className="flex flex-col gap-y-2">
{Object.keys(exportOptions) {Object.entries(exportOptions)
.filter((type) => type !== exportType) .filter(([type, _]) => type !== exportType)
.map((type) => ( .map(([key, data]) => (
<button <button
key={type} key={key}
onClick={() => { onClick={() => {
setExportType(type); setExportType(key);
setShowMenu(false); setShowMenu(false);
}} }}
className="text-white hover:bg-slate-200/20 w-full text-left px-4 py-1.5 rounded-md" className="text-white hover:bg-slate-200/20 w-full text-left px-4 py-1.5 rounded-md"
> >
{type.toUpperCase()} {data.name}
</button> </button>
))} ))}
</div> </div>

View File

@ -1417,6 +1417,11 @@ file-entry-cache@^6.0.1:
dependencies: dependencies:
flat-cache "^3.0.4" flat-cache "^3.0.4"
file-saver@^2.0.5:
version "2.0.5"
resolved "https://registry.yarnpkg.com/file-saver/-/file-saver-2.0.5.tgz#d61cfe2ce059f414d899e9dd6d4107ee25670c38"
integrity sha512-P9bmyZ3h/PRG+Nzga+rbdI4OEpNDzAVyy74uVO9ATgzLK6VtAsYybF/+TOCvrc0MO793d6+42lLyZTw7/ArVzA==
file-selector@^0.6.0: file-selector@^0.6.0:
version "0.6.0" version "0.6.0"
resolved "https://registry.yarnpkg.com/file-selector/-/file-selector-0.6.0.tgz#fa0a8d9007b829504db4d07dd4de0310b65287dc" resolved "https://registry.yarnpkg.com/file-selector/-/file-selector-0.6.0.tgz#fa0a8d9007b829504db4d07dd4de0310b65287dc"

View File

@ -4,7 +4,6 @@
const { Workspace } = require("../../../models/workspace"); const { Workspace } = require("../../../models/workspace");
const { WorkspaceChats } = require("../../../models/workspaceChats"); const { WorkspaceChats } = require("../../../models/workspaceChats");
// Todo: add RLHF feedbackScore field support
async function convertToCSV(preparedData) { async function convertToCSV(preparedData) {
const rows = ["id,username,workspace,prompt,response,sent_at,rating"]; const rows = ["id,username,workspace,prompt,response,sent_at,rating"];
for (const item of preparedData) { for (const item of preparedData) {
@ -27,7 +26,12 @@ async function convertToJSON(workspaceChatsMap) {
[], [],
Object.values(workspaceChatsMap).map((workspace) => workspace.messages) Object.values(workspaceChatsMap).map((workspace) => workspace.messages)
); );
return JSON.stringify(allMessages); return JSON.stringify(allMessages, null, 4);
}
// ref: https://raw.githubusercontent.com/gururise/AlpacaDataCleaned/main/alpaca_data.json
async function convertToJSONAlpaca(preparedData) {
return JSON.stringify(preparedData, null, 4);
} }
async function convertToJSONL(workspaceChatsMap) { async function convertToJSONL(workspaceChatsMap) {
@ -66,6 +70,19 @@ async function prepareWorkspaceChatsForExport(format = "jsonl") {
return preparedData; return preparedData;
} }
if (format === "jsonAlpaca") {
const preparedData = chats.map((chat) => {
const responseJson = JSON.parse(chat.response);
return {
instruction: chat.prompt,
input: "",
output: responseJson.text,
};
});
return preparedData;
}
const workspaceIds = [...new Set(chats.map((chat) => chat.workspaceId))]; const workspaceIds = [...new Set(chats.map((chat) => chat.workspaceId))];
const workspacesWithPrompts = await Promise.all( const workspacesWithPrompts = await Promise.all(
@ -124,6 +141,10 @@ const exportMap = {
contentType: "application/jsonl", contentType: "application/jsonl",
func: convertToJSONL, func: convertToJSONL,
}, },
jsonAlpaca: {
contentType: "application/json",
func: convertToJSONAlpaca,
},
}; };
function escapeCsv(str) { function escapeCsv(str) {