[FEAT] Improved CSV chat exports (#700)

* add more fields to csv export to make more useful

* refactor from review comments

* fix escapeCsv function

* catch export errors properly

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
Sean Hatfield 2024-02-13 10:12:59 -08:00 committed by GitHub
parent 0e6bd030e9
commit 1b29882c71
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 44 additions and 15 deletions

View File

@ -465,7 +465,10 @@ const System = {
method: "GET",
headers: baseHeaders(),
})
.then((res) => res.text())
.then((res) => {
if (res.ok) return res.text();
throw new Error(res.statusText);
})
.catch((e) => {
console.error(e);
return null;

View File

@ -21,7 +21,7 @@ export default function WorkspaceChats() {
};
const handleDumpChats = async () => {
const chats = await System.exportChats(exportType);
if (chats) {
if (!!chats) {
const { mimeType, fileExtension } = exportOptions[exportType];
const blob = new Blob([chats], { type: mimeType });
const link = document.createElement("a");

View File

@ -898,7 +898,7 @@ function systemEndpoints(app) {
async (request, response) => {
try {
const { type = "jsonl" } = request.query;
const chats = await prepareWorkspaceChatsForExport();
const chats = await prepareWorkspaceChatsForExport(type);
const { contentType, data } = await exportChatsAsType(chats, type);
await EventLogs.logEvent(
"exported_chats",

View File

@ -4,17 +4,19 @@
const { Workspace } = require("../../../models/workspace");
const { WorkspaceChats } = require("../../../models/workspaceChats");
// Todo: make this more useful for export by adding other columns about workspace, user, time, etc for post-filtering.
async function convertToCSV(workspaceChatsMap) {
const rows = ["role,content"];
for (const workspaceChats of Object.values(workspaceChatsMap)) {
for (const message of workspaceChats.messages) {
// Escape double quotes and wrap content in double quotes
const escapedContent = `"${message.content
.replace(/"/g, '""')
.replace(/\n/g, " ")}"`;
rows.push(`${message.role},${escapedContent}`);
}
// Todo: add RLHF feedbackScore field support
async function convertToCSV(preparedData) {
const rows = ["id,username,workspace,prompt,response,sent_at"];
for (const item of preparedData) {
const record = [
item.id,
escapeCsv(item.username),
escapeCsv(item.workspace),
escapeCsv(item.prompt),
escapeCsv(item.response),
item.sent_at,
].join(",");
rows.push(record);
}
return rows.join("\n");
}
@ -33,10 +35,30 @@ async function convertToJSONL(workspaceChatsMap) {
.join("\n");
}
async function prepareWorkspaceChatsForExport() {
async function prepareWorkspaceChatsForExport(format = "jsonl") {
if (!exportMap.hasOwnProperty(format))
throw new Error("Invalid export type.");
const chats = await WorkspaceChats.whereWithData({}, null, null, {
id: "asc",
});
if (format === "csv") {
const preparedData = chats.map((chat) => {
const responseJson = JSON.parse(chat.response);
return {
id: chat.id,
username: chat.user ? chat.user.username : "unknown user",
workspace: chat.workspace ? chat.workspace.name : "unknown workspace",
prompt: chat.prompt,
response: responseJson.text,
sent_at: chat.createdAt,
};
});
return preparedData;
}
const workspaceIds = [...new Set(chats.map((chat) => chat.workspaceId))];
const workspacesWithPrompts = await Promise.all(
@ -97,6 +119,10 @@ const exportMap = {
},
};
function escapeCsv(str) {
return `"${str.replace(/"/g, '""').replace(/\n/g, " ")}"`;
}
async function exportChatsAsType(workspaceChatsMap, format = "jsonl") {
const { contentType, func } = exportMap.hasOwnProperty(format)
? exportMap[format]