change alpaca format to include citations and system prompt

This commit is contained in:
timothycarambat 2024-07-15 16:05:53 -07:00
parent d037ab27f4
commit 86a66ba569

View File

@ -3,6 +3,7 @@
const { Workspace } = require("../../../models/workspace"); const { Workspace } = require("../../../models/workspace");
const { WorkspaceChats } = require("../../../models/workspaceChats"); const { WorkspaceChats } = require("../../../models/workspaceChats");
const { safeJsonParse } = require("../../http");
async function convertToCSV(preparedData) { async function convertToCSV(preparedData) {
const rows = ["id,username,workspace,prompt,response,sent_at,rating"]; const rows = ["id,username,workspace,prompt,response,sent_at,rating"];
@ -66,12 +67,24 @@ async function prepareWorkspaceChatsForExport(format = "jsonl") {
return preparedData; return preparedData;
} }
const workspaceIds = [...new Set(chats.map((chat) => chat.workspaceId))];
const workspacesWithPrompts = await Promise.all(
workspaceIds.map((id) => Workspace.get({ id: Number(id) }))
);
const workspacePromptsMap = workspacesWithPrompts.reduce((acc, workspace) => {
acc[workspace.id] = workspace.openAiPrompt;
return acc;
}, {});
if (format === "jsonAlpaca") { if (format === "jsonAlpaca") {
const preparedData = chats.map((chat) => { const preparedData = chats.map((chat) => {
const responseJson = JSON.parse(chat.response); const responseJson = JSON.parse(chat.response);
return { return {
instruction: chat.prompt, instruction: buildSystemPrompt(
input: "", chat,
workspacePromptsMap[chat.workspaceId]
),
input: chat.prompt,
output: responseJson.text, output: responseJson.text,
}; };
}); });
@ -79,17 +92,6 @@ async function prepareWorkspaceChatsForExport(format = "jsonl") {
return preparedData; return preparedData;
} }
const workspaceIds = [...new Set(chats.map((chat) => chat.workspaceId))];
const workspacesWithPrompts = await Promise.all(
workspaceIds.map((id) => Workspace.get({ id: Number(id) }))
);
const workspacePromptsMap = workspacesWithPrompts.reduce((acc, workspace) => {
acc[workspace.id] = workspace.openAiPrompt;
return acc;
}, {});
const workspaceChatsMap = chats.reduce((acc, chat) => { const workspaceChatsMap = chats.reduce((acc, chat) => {
const { prompt, response, workspaceId } = chat; const { prompt, response, workspaceId } = chat;
const responseJson = JSON.parse(response); const responseJson = JSON.parse(response);
@ -157,6 +159,23 @@ async function exportChatsAsType(workspaceChatsMap, format = "jsonl") {
}; };
} }
const STANDARD_PROMPT =
"Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.";
function buildSystemPrompt(chat, prompt = null) {
const sources = safeJsonParse(chat.response)?.sources || [];
const contextTexts = sources.map((source) => source.text);
const context =
sources.length > 0
? "\nContext:\n" +
contextTexts
.map((text, i) => {
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
})
.join("")
: "";
return `${prompt ?? STANDARD_PROMPT}${context}`;
}
module.exports = { module.exports = {
prepareWorkspaceChatsForExport, prepareWorkspaceChatsForExport,
exportChatsAsType, exportChatsAsType,