From 1f96b837b3883f2958d0a4845b15e835c7e53319 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Wed, 21 Aug 2024 14:47:06 -0700 Subject: [PATCH] Refactor api endpoint chat handler to its own function (#2157) remove legacy `chatWithWorkspace` and cleanup `index.js` --- .vscode/settings.json | 1 + server/endpoints/api/workspace/index.js | 37 +- server/endpoints/api/workspaceThread/index.js | 14 +- server/utils/chats/apiChatHandler.js | 481 ++++++++++++++++++ server/utils/chats/index.js | 212 -------- 5 files changed, 514 insertions(+), 231 deletions(-) create mode 100644 server/utils/chats/apiChatHandler.js diff --git a/.vscode/settings.json b/.vscode/settings.json index 3fcc79cd5..549fd1574 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -43,6 +43,7 @@ "searxng", "Serper", "Serply", + "streamable", "textgenwebui", "togetherai", "Unembed", diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index c9a6cb51e..1fe9ad8dc 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -4,19 +4,16 @@ const { Telemetry } = require("../../../models/telemetry"); const { DocumentVectors } = require("../../../models/vectors"); const { Workspace } = require("../../../models/workspace"); const { WorkspaceChats } = require("../../../models/workspaceChats"); -const { chatWithWorkspace } = require("../../../utils/chats"); const { getVectorDbClass } = require("../../../utils/helpers"); const { multiUserMode, reqBody } = require("../../../utils/http"); const { validApiKey } = require("../../../utils/middleware/validApiKey"); -const { - streamChatWithWorkspace, - VALID_CHAT_MODE, -} = require("../../../utils/chats/stream"); +const { VALID_CHAT_MODE } = require("../../../utils/chats/stream"); const { EventLogs } = require("../../../models/eventLogs"); const { convertToChatHistory, writeResponseChunk, } = require("../../../utils/helpers/chat/responses"); +const { ApiChatHandler } = require("../../../utils/chats/apiChatHandler"); function apiWorkspaceEndpoints(app) { if (!app) return; @@ -584,7 +581,7 @@ function apiWorkspaceEndpoints(app) { try { const { slug } = request.params; const { message, mode = "query" } = reqBody(request); - const workspace = await Workspace.get({ slug }); + const workspace = await Workspace.get({ slug: String(slug) }); if (!workspace) { response.status(400).json({ @@ -612,9 +609,17 @@ function apiWorkspaceEndpoints(app) { return; } - const result = await chatWithWorkspace(workspace, message, mode); + const result = await ApiChatHandler.chatSync({ + workspace, + message, + mode, + user: null, + thread: null, + }); + await Telemetry.sendTelemetry("sent_chat", { - LLMSelection: process.env.LLM_PROVIDER || "openai", + LLMSelection: + workspace.chatProvider ?? process.env.LLM_PROVIDER ?? "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", TTSSelection: process.env.TTS_PROVIDER || "native", @@ -623,7 +628,7 @@ function apiWorkspaceEndpoints(app) { workspaceName: workspace?.name, chatModel: workspace?.chatModel || "System Default", }); - response.status(200).json({ ...result }); + return response.status(200).json({ ...result }); } catch (e) { console.error(e.message, e); response.status(500).json({ @@ -702,7 +707,7 @@ function apiWorkspaceEndpoints(app) { try { const { slug } = request.params; const { message, mode = "query" } = reqBody(request); - const workspace = await Workspace.get({ slug }); + const workspace = await Workspace.get({ slug: String(slug) }); if (!workspace) { response.status(400).json({ @@ -736,9 +741,17 @@ function apiWorkspaceEndpoints(app) { response.setHeader("Connection", "keep-alive"); response.flushHeaders(); - await streamChatWithWorkspace(response, workspace, message, mode); + await ApiChatHandler.streamChat({ + response, + workspace, + message, + mode, + user: null, + thread: null, + }); await Telemetry.sendTelemetry("sent_chat", { - LLMSelection: process.env.LLM_PROVIDER || "openai", + LLMSelection: + workspace.chatProvider ?? process.env.LLM_PROVIDER ?? "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", TTSSelection: process.env.TTS_PROVIDER || "native", diff --git a/server/endpoints/api/workspaceThread/index.js b/server/endpoints/api/workspaceThread/index.js index de30e24d0..cdc4d598c 100644 --- a/server/endpoints/api/workspaceThread/index.js +++ b/server/endpoints/api/workspaceThread/index.js @@ -3,7 +3,6 @@ const { WorkspaceThread } = require("../../../models/workspaceThread"); const { Workspace } = require("../../../models/workspace"); const { validApiKey } = require("../../../utils/middleware/validApiKey"); const { reqBody, multiUserMode } = require("../../../utils/http"); -const { chatWithWorkspace } = require("../../../utils/chats"); const { streamChatWithWorkspace, VALID_CHAT_MODE, @@ -16,6 +15,7 @@ const { } = require("../../../utils/helpers/chat/responses"); const { WorkspaceChats } = require("../../../models/workspaceChats"); const { User } = require("../../../models/user"); +const { ApiChatHandler } = require("../../../utils/chats/apiChatHandler"); function apiWorkspaceThreadEndpoints(app) { if (!app) return; @@ -405,13 +405,13 @@ function apiWorkspaceThreadEndpoints(app) { } const user = userId ? await User.get({ id: Number(userId) }) : null; - const result = await chatWithWorkspace( + const result = await ApiChatHandler.chatSync({ workspace, message, mode, user, - thread - ); + thread, + }); await Telemetry.sendTelemetry("sent_chat", { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", @@ -556,14 +556,14 @@ function apiWorkspaceThreadEndpoints(app) { response.setHeader("Connection", "keep-alive"); response.flushHeaders(); - await streamChatWithWorkspace( + await ApiChatHandler.streamChat({ response, workspace, message, mode, user, - thread - ); + thread, + }); await Telemetry.sendTelemetry("sent_chat", { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", diff --git a/server/utils/chats/apiChatHandler.js b/server/utils/chats/apiChatHandler.js new file mode 100644 index 000000000..a52e2da14 --- /dev/null +++ b/server/utils/chats/apiChatHandler.js @@ -0,0 +1,481 @@ +const { v4: uuidv4 } = require("uuid"); +const { DocumentManager } = require("../DocumentManager"); +const { WorkspaceChats } = require("../../models/workspaceChats"); +const { getVectorDbClass, getLLMProvider } = require("../helpers"); +const { writeResponseChunk } = require("../helpers/chat/responses"); +const { chatPrompt, sourceIdentifier, recentChatHistory } = require("./index"); + +/** + * @typedef ResponseObject + * @property {string} id - uuid of response + * @property {string} type - Type of response + * @property {string|null} textResponse - full text response + * @property {object[]} sources + * @property {boolean} close + * @property {string|null} error + */ + +/** + * Handle synchronous chats with your workspace via the developer API endpoint + * @param {{ + * workspace: import("@prisma/client").workspaces, + * message:string, + * mode: "chat"|"query", + * user: import("@prisma/client").users|null, + * thread: import("@prisma/client").workspace_threads|null, + * }} parameters + * @returns {Promise} + */ +async function chatSync({ + workspace, + message = null, + mode = "chat", + user = null, + thread = null, +}) { + const uuid = uuidv4(); + const chatMode = mode ?? "chat"; + const LLMConnector = getLLMProvider({ + provider: workspace?.chatProvider, + model: workspace?.chatModel, + }); + const VectorDb = getVectorDbClass(); + const messageLimit = workspace?.openAiHistory || 20; + const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug); + const embeddingsCount = await VectorDb.namespaceCount(workspace.slug); + + // User is trying to query-mode chat a workspace that has no data in it - so + // we should exit early as no information can be found under these conditions. + if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") { + const textResponse = + workspace?.queryRefusalResponse ?? + "There is no relevant information in this workspace to answer your query."; + + await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: String(message), + response: { + text: textResponse, + sources: [], + type: chatMode, + }, + include: false, + }); + + return { + id: uuid, + type: "textResponse", + sources: [], + close: true, + error: null, + textResponse, + }; + } + + // If we are here we know that we are in a workspace that is: + // 1. Chatting in "chat" mode and may or may _not_ have embeddings + // 2. Chatting in "query" mode and has at least 1 embedding + let contextTexts = []; + let sources = []; + let pinnedDocIdentifiers = []; + const { rawHistory, chatHistory } = await recentChatHistory({ + user, + workspace, + thread, + messageLimit, + chatMode, + }); + + await new DocumentManager({ + workspace, + maxTokens: LLMConnector.promptWindowLimit(), + }) + .pinnedDocs() + .then((pinnedDocs) => { + pinnedDocs.forEach((doc) => { + const { pageContent, ...metadata } = doc; + pinnedDocIdentifiers.push(sourceIdentifier(doc)); + contextTexts.push(doc.pageContent); + sources.push({ + text: + pageContent.slice(0, 1_000) + + "...continued on in source document...", + ...metadata, + }); + }); + }); + + const vectorSearchResults = + embeddingsCount !== 0 + ? await VectorDb.performSimilaritySearch({ + namespace: workspace.slug, + input: message, + LLMConnector, + similarityThreshold: workspace?.similarityThreshold, + topN: workspace?.topN, + filterIdentifiers: pinnedDocIdentifiers, + }) + : { + contextTexts: [], + sources: [], + message: null, + }; + + // Failed similarity search if it was run at all and failed. + if (!!vectorSearchResults.message) { + return { + id: uuid, + type: "abort", + textResponse: null, + sources: [], + close: true, + error: vectorSearchResults.message, + }; + } + + const { fillSourceWindow } = require("../helpers/chat"); + const filledSources = fillSourceWindow({ + nDocs: workspace?.topN || 4, + searchResults: vectorSearchResults.sources, + history: rawHistory, + filterIdentifiers: pinnedDocIdentifiers, + }); + + // Why does contextTexts get all the info, but sources only get current search? + // This is to give the ability of the LLM to "comprehend" a contextual response without + // populating the Citations under a response with documents the user "thinks" are irrelevant + // due to how we manage backfilling of the context to keep chats with the LLM more correct in responses. + // If a past citation was used to answer the question - that is visible in the history so it logically makes sense + // and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt. + // TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate. + contextTexts = [...contextTexts, ...filledSources.contextTexts]; + sources = [...sources, ...vectorSearchResults.sources]; + + // If in query mode and no context chunks are found from search, backfill, or pins - do not + // let the LLM try to hallucinate a response or use general knowledge and exit early + if (chatMode === "query" && contextTexts.length === 0) { + const textResponse = + workspace?.queryRefusalResponse ?? + "There is no relevant information in this workspace to answer your query."; + + await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: message, + response: { + text: textResponse, + sources: [], + type: chatMode, + }, + threadId: thread?.id || null, + include: false, + user, + }); + + return { + id: uuid, + type: "textResponse", + sources: [], + close: true, + error: null, + textResponse, + }; + } + + // Compress & Assemble message to ensure prompt passes token limit with room for response + // and build system messages based on inputs and history. + const messages = await LLMConnector.compressMessages( + { + systemPrompt: chatPrompt(workspace), + userPrompt: message, + contextTexts, + chatHistory, + }, + rawHistory + ); + + // Send the text completion. + const textResponse = await LLMConnector.getChatCompletion(messages, { + temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp, + }); + + if (!textResponse) { + return { + id: uuid, + type: "abort", + textResponse: null, + sources: [], + close: true, + error: "No text completion could be completed with this input.", + }; + } + + const { chat } = await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: message, + response: { text: textResponse, sources, type: chatMode }, + threadId: thread?.id || null, + user, + }); + + return { + id: uuid, + type: "textResponse", + close: true, + error: null, + chatId: chat.id, + textResponse, + sources, + }; +} + +/** + * Handle streamable HTTP chunks for chats with your workspace via the developer API endpoint + * @param {{ + * response: import("express").Response, + * workspace: import("@prisma/client").workspaces, + * message:string, + * mode: "chat"|"query", + * user: import("@prisma/client").users|null, + * thread: import("@prisma/client").workspace_threads|null, + * }} parameters + * @returns {Promise} + */ +async function streamChat({ + response, + workspace, + message = null, + mode = "chat", + user = null, + thread = null, +}) { + const uuid = uuidv4(); + const chatMode = mode ?? "chat"; + const LLMConnector = getLLMProvider({ + provider: workspace?.chatProvider, + model: workspace?.chatModel, + }); + + const VectorDb = getVectorDbClass(); + const messageLimit = workspace?.openAiHistory || 20; + const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug); + const embeddingsCount = await VectorDb.namespaceCount(workspace.slug); + + // User is trying to query-mode chat a workspace that has no data in it - so + // we should exit early as no information can be found under these conditions. + if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") { + const textResponse = + workspace?.queryRefusalResponse ?? + "There is no relevant information in this workspace to answer your query."; + writeResponseChunk(response, { + id: uuid, + type: "textResponse", + textResponse, + sources: [], + attachments: [], + close: true, + error: null, + }); + await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: message, + response: { + text: textResponse, + sources: [], + type: chatMode, + attachments: [], + }, + threadId: thread?.id || null, + include: false, + user, + }); + return; + } + + // If we are here we know that we are in a workspace that is: + // 1. Chatting in "chat" mode and may or may _not_ have embeddings + // 2. Chatting in "query" mode and has at least 1 embedding + let completeText; + let contextTexts = []; + let sources = []; + let pinnedDocIdentifiers = []; + const { rawHistory, chatHistory } = await recentChatHistory({ + user, + workspace, + thread, + messageLimit, + }); + + // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search + // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window. + // However we limit the maximum of appended context to 80% of its overall size, mostly because if it expands beyond this + // it will undergo prompt compression anyway to make it work. If there is so much pinned that the context here is bigger than + // what the model can support - it would get compressed anyway and that really is not the point of pinning. It is really best + // suited for high-context models. + await new DocumentManager({ + workspace, + maxTokens: LLMConnector.promptWindowLimit(), + }) + .pinnedDocs() + .then((pinnedDocs) => { + pinnedDocs.forEach((doc) => { + const { pageContent, ...metadata } = doc; + pinnedDocIdentifiers.push(sourceIdentifier(doc)); + contextTexts.push(doc.pageContent); + sources.push({ + text: + pageContent.slice(0, 1_000) + + "...continued on in source document...", + ...metadata, + }); + }); + }); + + const vectorSearchResults = + embeddingsCount !== 0 + ? await VectorDb.performSimilaritySearch({ + namespace: workspace.slug, + input: message, + LLMConnector, + similarityThreshold: workspace?.similarityThreshold, + topN: workspace?.topN, + filterIdentifiers: pinnedDocIdentifiers, + }) + : { + contextTexts: [], + sources: [], + message: null, + }; + + // Failed similarity search if it was run at all and failed. + if (!!vectorSearchResults.message) { + writeResponseChunk(response, { + id: uuid, + type: "abort", + textResponse: null, + sources: [], + close: true, + error: vectorSearchResults.message, + }); + return; + } + + const { fillSourceWindow } = require("../helpers/chat"); + const filledSources = fillSourceWindow({ + nDocs: workspace?.topN || 4, + searchResults: vectorSearchResults.sources, + history: rawHistory, + filterIdentifiers: pinnedDocIdentifiers, + }); + + // Why does contextTexts get all the info, but sources only get current search? + // This is to give the ability of the LLM to "comprehend" a contextual response without + // populating the Citations under a response with documents the user "thinks" are irrelevant + // due to how we manage backfilling of the context to keep chats with the LLM more correct in responses. + // If a past citation was used to answer the question - that is visible in the history so it logically makes sense + // and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt. + // TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate. + contextTexts = [...contextTexts, ...filledSources.contextTexts]; + sources = [...sources, ...vectorSearchResults.sources]; + + // If in query mode and no context chunks are found from search, backfill, or pins - do not + // let the LLM try to hallucinate a response or use general knowledge and exit early + if (chatMode === "query" && contextTexts.length === 0) { + const textResponse = + workspace?.queryRefusalResponse ?? + "There is no relevant information in this workspace to answer your query."; + writeResponseChunk(response, { + id: uuid, + type: "textResponse", + textResponse, + sources: [], + close: true, + error: null, + }); + + await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: message, + response: { + text: textResponse, + sources: [], + type: chatMode, + attachments: [], + }, + threadId: thread?.id || null, + include: false, + user, + }); + return; + } + + // Compress & Assemble message to ensure prompt passes token limit with room for response + // and build system messages based on inputs and history. + const messages = await LLMConnector.compressMessages( + { + systemPrompt: chatPrompt(workspace), + userPrompt: message, + contextTexts, + chatHistory, + }, + rawHistory + ); + + // If streaming is not explicitly enabled for connector + // we do regular waiting of a response and send a single chunk. + if (LLMConnector.streamingEnabled() !== true) { + console.log( + `\x1b[31m[STREAMING DISABLED]\x1b[0m Streaming is not available for ${LLMConnector.constructor.name}. Will use regular chat method.` + ); + completeText = await LLMConnector.getChatCompletion(messages, { + temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp, + }); + writeResponseChunk(response, { + uuid, + sources, + type: "textResponseChunk", + textResponse: completeText, + close: true, + error: false, + }); + } else { + const stream = await LLMConnector.streamGetChatCompletion(messages, { + temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp, + }); + completeText = await LLMConnector.handleStream(response, stream, { + uuid, + sources, + }); + } + + if (completeText?.length > 0) { + const { chat } = await WorkspaceChats.new({ + workspaceId: workspace.id, + prompt: message, + response: { text: completeText, sources, type: chatMode }, + threadId: thread?.id || null, + user, + }); + + writeResponseChunk(response, { + uuid, + type: "finalizeResponseStream", + close: true, + error: false, + chatId: chat.id, + }); + return; + } + + writeResponseChunk(response, { + uuid, + type: "finalizeResponseStream", + close: true, + error: false, + }); + return; +} + +module.exports.ApiChatHandler = { + chatSync, + streamChat, +}; diff --git a/server/utils/chats/index.js b/server/utils/chats/index.js index dd0f6076f..3ec358728 100644 --- a/server/utils/chats/index.js +++ b/server/utils/chats/index.js @@ -1,9 +1,7 @@ const { v4: uuidv4 } = require("uuid"); const { WorkspaceChats } = require("../../models/workspaceChats"); const { resetMemory } = require("./commands/reset"); -const { getVectorDbClass, getLLMProvider } = require("../helpers"); const { convertToPromptHistory } = require("../helpers/chat/responses"); -const { DocumentManager } = require("../DocumentManager"); const { SlashCommandPresets } = require("../../models/slashCommandsPresets"); const VALID_COMMANDS = { @@ -34,216 +32,6 @@ async function grepCommand(message, user = null) { return updatedMessage; } -async function chatWithWorkspace( - workspace, - message, - chatMode = "chat", - user = null, - thread = null -) { - const uuid = uuidv4(); - const updatedMessage = await grepCommand(message, user); - - if (Object.keys(VALID_COMMANDS).includes(updatedMessage)) { - return await VALID_COMMANDS[updatedMessage](workspace, message, uuid, user); - } - - const LLMConnector = getLLMProvider({ - provider: workspace?.chatProvider, - model: workspace?.chatModel, - }); - const VectorDb = getVectorDbClass(); - - const messageLimit = workspace?.openAiHistory || 20; - const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug); - const embeddingsCount = await VectorDb.namespaceCount(workspace.slug); - - // User is trying to query-mode chat a workspace that has no data in it - so - // we should exit early as no information can be found under these conditions. - if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") { - const textResponse = - workspace?.queryRefusalResponse ?? - "There is no relevant information in this workspace to answer your query."; - - await WorkspaceChats.new({ - workspaceId: workspace.id, - prompt: message, - response: { - text: textResponse, - sources: [], - type: chatMode, - }, - threadId: thread?.id || null, - include: false, - user, - }); - - return { - id: uuid, - type: "textResponse", - sources: [], - close: true, - error: null, - textResponse, - }; - } - - // If we are here we know that we are in a workspace that is: - // 1. Chatting in "chat" mode and may or may _not_ have embeddings - // 2. Chatting in "query" mode and has at least 1 embedding - let contextTexts = []; - let sources = []; - let pinnedDocIdentifiers = []; - const { rawHistory, chatHistory } = await recentChatHistory({ - user, - workspace, - thread, - messageLimit, - chatMode, - }); - - // See stream.js comment for more information on this implementation. - await new DocumentManager({ - workspace, - maxTokens: LLMConnector.promptWindowLimit(), - }) - .pinnedDocs() - .then((pinnedDocs) => { - pinnedDocs.forEach((doc) => { - const { pageContent, ...metadata } = doc; - pinnedDocIdentifiers.push(sourceIdentifier(doc)); - contextTexts.push(doc.pageContent); - sources.push({ - text: - pageContent.slice(0, 1_000) + - "...continued on in source document...", - ...metadata, - }); - }); - }); - - const vectorSearchResults = - embeddingsCount !== 0 - ? await VectorDb.performSimilaritySearch({ - namespace: workspace.slug, - input: message, - LLMConnector, - similarityThreshold: workspace?.similarityThreshold, - topN: workspace?.topN, - filterIdentifiers: pinnedDocIdentifiers, - }) - : { - contextTexts: [], - sources: [], - message: null, - }; - - // Failed similarity search if it was run at all and failed. - if (!!vectorSearchResults.message) { - return { - id: uuid, - type: "abort", - textResponse: null, - sources: [], - close: true, - error: vectorSearchResults.message, - }; - } - - const { fillSourceWindow } = require("../helpers/chat"); - const filledSources = fillSourceWindow({ - nDocs: workspace?.topN || 4, - searchResults: vectorSearchResults.sources, - history: rawHistory, - filterIdentifiers: pinnedDocIdentifiers, - }); - - // Why does contextTexts get all the info, but sources only get current search? - // This is to give the ability of the LLM to "comprehend" a contextual response without - // populating the Citations under a response with documents the user "thinks" are irrelevant - // due to how we manage backfilling of the context to keep chats with the LLM more correct in responses. - // If a past citation was used to answer the question - that is visible in the history so it logically makes sense - // and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt. - // TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate. - contextTexts = [...contextTexts, ...filledSources.contextTexts]; - sources = [...sources, ...vectorSearchResults.sources]; - - // If in query mode and no context chunks are found from search, backfill, or pins - do not - // let the LLM try to hallucinate a response or use general knowledge and exit early - if (chatMode === "query" && contextTexts.length === 0) { - const textResponse = - workspace?.queryRefusalResponse ?? - "There is no relevant information in this workspace to answer your query."; - - await WorkspaceChats.new({ - workspaceId: workspace.id, - prompt: message, - response: { - text: textResponse, - sources: [], - type: chatMode, - }, - threadId: thread?.id || null, - include: false, - user, - }); - - return { - id: uuid, - type: "textResponse", - sources: [], - close: true, - error: null, - textResponse, - }; - } - - // Compress & Assemble message to ensure prompt passes token limit with room for response - // and build system messages based on inputs and history. - const messages = await LLMConnector.compressMessages( - { - systemPrompt: chatPrompt(workspace), - userPrompt: updatedMessage, - contextTexts, - chatHistory, - }, - rawHistory - ); - - // Send the text completion. - const textResponse = await LLMConnector.getChatCompletion(messages, { - temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp, - }); - - if (!textResponse) { - return { - id: uuid, - type: "abort", - textResponse: null, - sources: [], - close: true, - error: "No text completion could be completed with this input.", - }; - } - - const { chat } = await WorkspaceChats.new({ - workspaceId: workspace.id, - prompt: message, - response: { text: textResponse, sources, type: chatMode }, - threadId: thread?.id || null, - user, - }); - return { - id: uuid, - type: "textResponse", - close: true, - error: null, - chatId: chat.id, - textResponse, - sources, - }; -} - async function recentChatHistory({ user = null, workspace,