Refactor api endpoint chat handler to its own function (#2157)

remove legacy `chatWithWorkspace` and cleanup `index.js`
This commit is contained in:
Timothy Carambat 2024-08-21 14:47:06 -07:00 committed by GitHub
parent f7756d4758
commit 1f96b837b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 514 additions and 231 deletions

View File

@ -43,6 +43,7 @@
"searxng", "searxng",
"Serper", "Serper",
"Serply", "Serply",
"streamable",
"textgenwebui", "textgenwebui",
"togetherai", "togetherai",
"Unembed", "Unembed",

View File

@ -4,19 +4,16 @@ const { Telemetry } = require("../../../models/telemetry");
const { DocumentVectors } = require("../../../models/vectors"); const { DocumentVectors } = require("../../../models/vectors");
const { Workspace } = require("../../../models/workspace"); const { Workspace } = require("../../../models/workspace");
const { WorkspaceChats } = require("../../../models/workspaceChats"); const { WorkspaceChats } = require("../../../models/workspaceChats");
const { chatWithWorkspace } = require("../../../utils/chats");
const { getVectorDbClass } = require("../../../utils/helpers"); const { getVectorDbClass } = require("../../../utils/helpers");
const { multiUserMode, reqBody } = require("../../../utils/http"); const { multiUserMode, reqBody } = require("../../../utils/http");
const { validApiKey } = require("../../../utils/middleware/validApiKey"); const { validApiKey } = require("../../../utils/middleware/validApiKey");
const { const { VALID_CHAT_MODE } = require("../../../utils/chats/stream");
streamChatWithWorkspace,
VALID_CHAT_MODE,
} = require("../../../utils/chats/stream");
const { EventLogs } = require("../../../models/eventLogs"); const { EventLogs } = require("../../../models/eventLogs");
const { const {
convertToChatHistory, convertToChatHistory,
writeResponseChunk, writeResponseChunk,
} = require("../../../utils/helpers/chat/responses"); } = require("../../../utils/helpers/chat/responses");
const { ApiChatHandler } = require("../../../utils/chats/apiChatHandler");
function apiWorkspaceEndpoints(app) { function apiWorkspaceEndpoints(app) {
if (!app) return; if (!app) return;
@ -584,7 +581,7 @@ function apiWorkspaceEndpoints(app) {
try { try {
const { slug } = request.params; const { slug } = request.params;
const { message, mode = "query" } = reqBody(request); const { message, mode = "query" } = reqBody(request);
const workspace = await Workspace.get({ slug }); const workspace = await Workspace.get({ slug: String(slug) });
if (!workspace) { if (!workspace) {
response.status(400).json({ response.status(400).json({
@ -612,9 +609,17 @@ function apiWorkspaceEndpoints(app) {
return; return;
} }
const result = await chatWithWorkspace(workspace, message, mode); const result = await ApiChatHandler.chatSync({
workspace,
message,
mode,
user: null,
thread: null,
});
await Telemetry.sendTelemetry("sent_chat", { await Telemetry.sendTelemetry("sent_chat", {
LLMSelection: process.env.LLM_PROVIDER || "openai", LLMSelection:
workspace.chatProvider ?? process.env.LLM_PROVIDER ?? "openai",
Embedder: process.env.EMBEDDING_ENGINE || "inherit", Embedder: process.env.EMBEDDING_ENGINE || "inherit",
VectorDbSelection: process.env.VECTOR_DB || "lancedb", VectorDbSelection: process.env.VECTOR_DB || "lancedb",
TTSSelection: process.env.TTS_PROVIDER || "native", TTSSelection: process.env.TTS_PROVIDER || "native",
@ -623,7 +628,7 @@ function apiWorkspaceEndpoints(app) {
workspaceName: workspace?.name, workspaceName: workspace?.name,
chatModel: workspace?.chatModel || "System Default", chatModel: workspace?.chatModel || "System Default",
}); });
response.status(200).json({ ...result }); return response.status(200).json({ ...result });
} catch (e) { } catch (e) {
console.error(e.message, e); console.error(e.message, e);
response.status(500).json({ response.status(500).json({
@ -702,7 +707,7 @@ function apiWorkspaceEndpoints(app) {
try { try {
const { slug } = request.params; const { slug } = request.params;
const { message, mode = "query" } = reqBody(request); const { message, mode = "query" } = reqBody(request);
const workspace = await Workspace.get({ slug }); const workspace = await Workspace.get({ slug: String(slug) });
if (!workspace) { if (!workspace) {
response.status(400).json({ response.status(400).json({
@ -736,9 +741,17 @@ function apiWorkspaceEndpoints(app) {
response.setHeader("Connection", "keep-alive"); response.setHeader("Connection", "keep-alive");
response.flushHeaders(); response.flushHeaders();
await streamChatWithWorkspace(response, workspace, message, mode); await ApiChatHandler.streamChat({
response,
workspace,
message,
mode,
user: null,
thread: null,
});
await Telemetry.sendTelemetry("sent_chat", { await Telemetry.sendTelemetry("sent_chat", {
LLMSelection: process.env.LLM_PROVIDER || "openai", LLMSelection:
workspace.chatProvider ?? process.env.LLM_PROVIDER ?? "openai",
Embedder: process.env.EMBEDDING_ENGINE || "inherit", Embedder: process.env.EMBEDDING_ENGINE || "inherit",
VectorDbSelection: process.env.VECTOR_DB || "lancedb", VectorDbSelection: process.env.VECTOR_DB || "lancedb",
TTSSelection: process.env.TTS_PROVIDER || "native", TTSSelection: process.env.TTS_PROVIDER || "native",

View File

@ -3,7 +3,6 @@ const { WorkspaceThread } = require("../../../models/workspaceThread");
const { Workspace } = require("../../../models/workspace"); const { Workspace } = require("../../../models/workspace");
const { validApiKey } = require("../../../utils/middleware/validApiKey"); const { validApiKey } = require("../../../utils/middleware/validApiKey");
const { reqBody, multiUserMode } = require("../../../utils/http"); const { reqBody, multiUserMode } = require("../../../utils/http");
const { chatWithWorkspace } = require("../../../utils/chats");
const { const {
streamChatWithWorkspace, streamChatWithWorkspace,
VALID_CHAT_MODE, VALID_CHAT_MODE,
@ -16,6 +15,7 @@ const {
} = require("../../../utils/helpers/chat/responses"); } = require("../../../utils/helpers/chat/responses");
const { WorkspaceChats } = require("../../../models/workspaceChats"); const { WorkspaceChats } = require("../../../models/workspaceChats");
const { User } = require("../../../models/user"); const { User } = require("../../../models/user");
const { ApiChatHandler } = require("../../../utils/chats/apiChatHandler");
function apiWorkspaceThreadEndpoints(app) { function apiWorkspaceThreadEndpoints(app) {
if (!app) return; if (!app) return;
@ -405,13 +405,13 @@ function apiWorkspaceThreadEndpoints(app) {
} }
const user = userId ? await User.get({ id: Number(userId) }) : null; const user = userId ? await User.get({ id: Number(userId) }) : null;
const result = await chatWithWorkspace( const result = await ApiChatHandler.chatSync({
workspace, workspace,
message, message,
mode, mode,
user, user,
thread thread,
); });
await Telemetry.sendTelemetry("sent_chat", { await Telemetry.sendTelemetry("sent_chat", {
LLMSelection: process.env.LLM_PROVIDER || "openai", LLMSelection: process.env.LLM_PROVIDER || "openai",
Embedder: process.env.EMBEDDING_ENGINE || "inherit", Embedder: process.env.EMBEDDING_ENGINE || "inherit",
@ -556,14 +556,14 @@ function apiWorkspaceThreadEndpoints(app) {
response.setHeader("Connection", "keep-alive"); response.setHeader("Connection", "keep-alive");
response.flushHeaders(); response.flushHeaders();
await streamChatWithWorkspace( await ApiChatHandler.streamChat({
response, response,
workspace, workspace,
message, message,
mode, mode,
user, user,
thread thread,
); });
await Telemetry.sendTelemetry("sent_chat", { await Telemetry.sendTelemetry("sent_chat", {
LLMSelection: process.env.LLM_PROVIDER || "openai", LLMSelection: process.env.LLM_PROVIDER || "openai",
Embedder: process.env.EMBEDDING_ENGINE || "inherit", Embedder: process.env.EMBEDDING_ENGINE || "inherit",

View File

@ -0,0 +1,481 @@
const { v4: uuidv4 } = require("uuid");
const { DocumentManager } = require("../DocumentManager");
const { WorkspaceChats } = require("../../models/workspaceChats");
const { getVectorDbClass, getLLMProvider } = require("../helpers");
const { writeResponseChunk } = require("../helpers/chat/responses");
const { chatPrompt, sourceIdentifier, recentChatHistory } = require("./index");
/**
* @typedef ResponseObject
* @property {string} id - uuid of response
* @property {string} type - Type of response
* @property {string|null} textResponse - full text response
* @property {object[]} sources
* @property {boolean} close
* @property {string|null} error
*/
/**
* Handle synchronous chats with your workspace via the developer API endpoint
* @param {{
* workspace: import("@prisma/client").workspaces,
* message:string,
* mode: "chat"|"query",
* user: import("@prisma/client").users|null,
* thread: import("@prisma/client").workspace_threads|null,
* }} parameters
* @returns {Promise<ResponseObject>}
*/
async function chatSync({
workspace,
message = null,
mode = "chat",
user = null,
thread = null,
}) {
const uuid = uuidv4();
const chatMode = mode ?? "chat";
const LLMConnector = getLLMProvider({
provider: workspace?.chatProvider,
model: workspace?.chatModel,
});
const VectorDb = getVectorDbClass();
const messageLimit = workspace?.openAiHistory || 20;
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
// User is trying to query-mode chat a workspace that has no data in it - so
// we should exit early as no information can be found under these conditions.
if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
const textResponse =
workspace?.queryRefusalResponse ??
"There is no relevant information in this workspace to answer your query.";
await WorkspaceChats.new({
workspaceId: workspace.id,
prompt: String(message),
response: {
text: textResponse,
sources: [],
type: chatMode,
},
include: false,
});
return {
id: uuid,
type: "textResponse",
sources: [],
close: true,
error: null,
textResponse,
};
}
// If we are here we know that we are in a workspace that is:
// 1. Chatting in "chat" mode and may or may _not_ have embeddings
// 2. Chatting in "query" mode and has at least 1 embedding
let contextTexts = [];
let sources = [];
let pinnedDocIdentifiers = [];
const { rawHistory, chatHistory } = await recentChatHistory({
user,
workspace,
thread,
messageLimit,
chatMode,
});
await new DocumentManager({
workspace,
maxTokens: LLMConnector.promptWindowLimit(),
})
.pinnedDocs()
.then((pinnedDocs) => {
pinnedDocs.forEach((doc) => {
const { pageContent, ...metadata } = doc;
pinnedDocIdentifiers.push(sourceIdentifier(doc));
contextTexts.push(doc.pageContent);
sources.push({
text:
pageContent.slice(0, 1_000) +
"...continued on in source document...",
...metadata,
});
});
});
const vectorSearchResults =
embeddingsCount !== 0
? await VectorDb.performSimilaritySearch({
namespace: workspace.slug,
input: message,
LLMConnector,
similarityThreshold: workspace?.similarityThreshold,
topN: workspace?.topN,
filterIdentifiers: pinnedDocIdentifiers,
})
: {
contextTexts: [],
sources: [],
message: null,
};
// Failed similarity search if it was run at all and failed.
if (!!vectorSearchResults.message) {
return {
id: uuid,
type: "abort",
textResponse: null,
sources: [],
close: true,
error: vectorSearchResults.message,
};
}
const { fillSourceWindow } = require("../helpers/chat");
const filledSources = fillSourceWindow({
nDocs: workspace?.topN || 4,
searchResults: vectorSearchResults.sources,
history: rawHistory,
filterIdentifiers: pinnedDocIdentifiers,
});
// Why does contextTexts get all the info, but sources only get current search?
// This is to give the ability of the LLM to "comprehend" a contextual response without
// populating the Citations under a response with documents the user "thinks" are irrelevant
// due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
// If a past citation was used to answer the question - that is visible in the history so it logically makes sense
// and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
// TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
contextTexts = [...contextTexts, ...filledSources.contextTexts];
sources = [...sources, ...vectorSearchResults.sources];
// If in query mode and no context chunks are found from search, backfill, or pins - do not
// let the LLM try to hallucinate a response or use general knowledge and exit early
if (chatMode === "query" && contextTexts.length === 0) {
const textResponse =
workspace?.queryRefusalResponse ??
"There is no relevant information in this workspace to answer your query.";
await WorkspaceChats.new({
workspaceId: workspace.id,
prompt: message,
response: {
text: textResponse,
sources: [],
type: chatMode,
},
threadId: thread?.id || null,
include: false,
user,
});
return {
id: uuid,
type: "textResponse",
sources: [],
close: true,
error: null,
textResponse,
};
}
// Compress & Assemble message to ensure prompt passes token limit with room for response
// and build system messages based on inputs and history.
const messages = await LLMConnector.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: message,
contextTexts,
chatHistory,
},
rawHistory
);
// Send the text completion.
const textResponse = await LLMConnector.getChatCompletion(messages, {
temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp,
});
if (!textResponse) {
return {
id: uuid,
type: "abort",
textResponse: null,
sources: [],
close: true,
error: "No text completion could be completed with this input.",
};
}
const { chat } = await WorkspaceChats.new({
workspaceId: workspace.id,
prompt: message,
response: { text: textResponse, sources, type: chatMode },
threadId: thread?.id || null,
user,
});
return {
id: uuid,
type: "textResponse",
close: true,
error: null,
chatId: chat.id,
textResponse,
sources,
};
}
/**
* Handle streamable HTTP chunks for chats with your workspace via the developer API endpoint
* @param {{
* response: import("express").Response,
* workspace: import("@prisma/client").workspaces,
* message:string,
* mode: "chat"|"query",
* user: import("@prisma/client").users|null,
* thread: import("@prisma/client").workspace_threads|null,
* }} parameters
* @returns {Promise<VoidFunction>}
*/
async function streamChat({
response,
workspace,
message = null,
mode = "chat",
user = null,
thread = null,
}) {
const uuid = uuidv4();
const chatMode = mode ?? "chat";
const LLMConnector = getLLMProvider({
provider: workspace?.chatProvider,
model: workspace?.chatModel,
});
const VectorDb = getVectorDbClass();
const messageLimit = workspace?.openAiHistory || 20;
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
// User is trying to query-mode chat a workspace that has no data in it - so
// we should exit early as no information can be found under these conditions.
if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
const textResponse =
workspace?.queryRefusalResponse ??
"There is no relevant information in this workspace to answer your query.";
writeResponseChunk(response, {
id: uuid,
type: "textResponse",
textResponse,
sources: [],
attachments: [],
close: true,
error: null,
});
await WorkspaceChats.new({
workspaceId: workspace.id,
prompt: message,
response: {
text: textResponse,
sources: [],
type: chatMode,
attachments: [],
},
threadId: thread?.id || null,
include: false,
user,
});
return;
}
// If we are here we know that we are in a workspace that is:
// 1. Chatting in "chat" mode and may or may _not_ have embeddings
// 2. Chatting in "query" mode and has at least 1 embedding
let completeText;
let contextTexts = [];
let sources = [];
let pinnedDocIdentifiers = [];
const { rawHistory, chatHistory } = await recentChatHistory({
user,
workspace,
thread,
messageLimit,
});
// Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
// as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window.
// However we limit the maximum of appended context to 80% of its overall size, mostly because if it expands beyond this
// it will undergo prompt compression anyway to make it work. If there is so much pinned that the context here is bigger than
// what the model can support - it would get compressed anyway and that really is not the point of pinning. It is really best
// suited for high-context models.
await new DocumentManager({
workspace,
maxTokens: LLMConnector.promptWindowLimit(),
})
.pinnedDocs()
.then((pinnedDocs) => {
pinnedDocs.forEach((doc) => {
const { pageContent, ...metadata } = doc;
pinnedDocIdentifiers.push(sourceIdentifier(doc));
contextTexts.push(doc.pageContent);
sources.push({
text:
pageContent.slice(0, 1_000) +
"...continued on in source document...",
...metadata,
});
});
});
const vectorSearchResults =
embeddingsCount !== 0
? await VectorDb.performSimilaritySearch({
namespace: workspace.slug,
input: message,
LLMConnector,
similarityThreshold: workspace?.similarityThreshold,
topN: workspace?.topN,
filterIdentifiers: pinnedDocIdentifiers,
})
: {
contextTexts: [],
sources: [],
message: null,
};
// Failed similarity search if it was run at all and failed.
if (!!vectorSearchResults.message) {
writeResponseChunk(response, {
id: uuid,
type: "abort",
textResponse: null,
sources: [],
close: true,
error: vectorSearchResults.message,
});
return;
}
const { fillSourceWindow } = require("../helpers/chat");
const filledSources = fillSourceWindow({
nDocs: workspace?.topN || 4,
searchResults: vectorSearchResults.sources,
history: rawHistory,
filterIdentifiers: pinnedDocIdentifiers,
});
// Why does contextTexts get all the info, but sources only get current search?
// This is to give the ability of the LLM to "comprehend" a contextual response without
// populating the Citations under a response with documents the user "thinks" are irrelevant
// due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
// If a past citation was used to answer the question - that is visible in the history so it logically makes sense
// and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
// TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
contextTexts = [...contextTexts, ...filledSources.contextTexts];
sources = [...sources, ...vectorSearchResults.sources];
// If in query mode and no context chunks are found from search, backfill, or pins - do not
// let the LLM try to hallucinate a response or use general knowledge and exit early
if (chatMode === "query" && contextTexts.length === 0) {
const textResponse =
workspace?.queryRefusalResponse ??
"There is no relevant information in this workspace to answer your query.";
writeResponseChunk(response, {
id: uuid,
type: "textResponse",
textResponse,
sources: [],
close: true,
error: null,
});
await WorkspaceChats.new({
workspaceId: workspace.id,
prompt: message,
response: {
text: textResponse,
sources: [],
type: chatMode,
attachments: [],
},
threadId: thread?.id || null,
include: false,
user,
});
return;
}
// Compress & Assemble message to ensure prompt passes token limit with room for response
// and build system messages based on inputs and history.
const messages = await LLMConnector.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: message,
contextTexts,
chatHistory,
},
rawHistory
);
// If streaming is not explicitly enabled for connector
// we do regular waiting of a response and send a single chunk.
if (LLMConnector.streamingEnabled() !== true) {
console.log(
`\x1b[31m[STREAMING DISABLED]\x1b[0m Streaming is not available for ${LLMConnector.constructor.name}. Will use regular chat method.`
);
completeText = await LLMConnector.getChatCompletion(messages, {
temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp,
});
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: completeText,
close: true,
error: false,
});
} else {
const stream = await LLMConnector.streamGetChatCompletion(messages, {
temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp,
});
completeText = await LLMConnector.handleStream(response, stream, {
uuid,
sources,
});
}
if (completeText?.length > 0) {
const { chat } = await WorkspaceChats.new({
workspaceId: workspace.id,
prompt: message,
response: { text: completeText, sources, type: chatMode },
threadId: thread?.id || null,
user,
});
writeResponseChunk(response, {
uuid,
type: "finalizeResponseStream",
close: true,
error: false,
chatId: chat.id,
});
return;
}
writeResponseChunk(response, {
uuid,
type: "finalizeResponseStream",
close: true,
error: false,
});
return;
}
module.exports.ApiChatHandler = {
chatSync,
streamChat,
};

View File

@ -1,9 +1,7 @@
const { v4: uuidv4 } = require("uuid"); const { v4: uuidv4 } = require("uuid");
const { WorkspaceChats } = require("../../models/workspaceChats"); const { WorkspaceChats } = require("../../models/workspaceChats");
const { resetMemory } = require("./commands/reset"); const { resetMemory } = require("./commands/reset");
const { getVectorDbClass, getLLMProvider } = require("../helpers");
const { convertToPromptHistory } = require("../helpers/chat/responses"); const { convertToPromptHistory } = require("../helpers/chat/responses");
const { DocumentManager } = require("../DocumentManager");
const { SlashCommandPresets } = require("../../models/slashCommandsPresets"); const { SlashCommandPresets } = require("../../models/slashCommandsPresets");
const VALID_COMMANDS = { const VALID_COMMANDS = {
@ -34,216 +32,6 @@ async function grepCommand(message, user = null) {
return updatedMessage; return updatedMessage;
} }
async function chatWithWorkspace(
workspace,
message,
chatMode = "chat",
user = null,
thread = null
) {
const uuid = uuidv4();
const updatedMessage = await grepCommand(message, user);
if (Object.keys(VALID_COMMANDS).includes(updatedMessage)) {
return await VALID_COMMANDS[updatedMessage](workspace, message, uuid, user);
}
const LLMConnector = getLLMProvider({
provider: workspace?.chatProvider,
model: workspace?.chatModel,
});
const VectorDb = getVectorDbClass();
const messageLimit = workspace?.openAiHistory || 20;
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
// User is trying to query-mode chat a workspace that has no data in it - so
// we should exit early as no information can be found under these conditions.
if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
const textResponse =
workspace?.queryRefusalResponse ??
"There is no relevant information in this workspace to answer your query.";
await WorkspaceChats.new({
workspaceId: workspace.id,
prompt: message,
response: {
text: textResponse,
sources: [],
type: chatMode,
},
threadId: thread?.id || null,
include: false,
user,
});
return {
id: uuid,
type: "textResponse",
sources: [],
close: true,
error: null,
textResponse,
};
}
// If we are here we know that we are in a workspace that is:
// 1. Chatting in "chat" mode and may or may _not_ have embeddings
// 2. Chatting in "query" mode and has at least 1 embedding
let contextTexts = [];
let sources = [];
let pinnedDocIdentifiers = [];
const { rawHistory, chatHistory } = await recentChatHistory({
user,
workspace,
thread,
messageLimit,
chatMode,
});
// See stream.js comment for more information on this implementation.
await new DocumentManager({
workspace,
maxTokens: LLMConnector.promptWindowLimit(),
})
.pinnedDocs()
.then((pinnedDocs) => {
pinnedDocs.forEach((doc) => {
const { pageContent, ...metadata } = doc;
pinnedDocIdentifiers.push(sourceIdentifier(doc));
contextTexts.push(doc.pageContent);
sources.push({
text:
pageContent.slice(0, 1_000) +
"...continued on in source document...",
...metadata,
});
});
});
const vectorSearchResults =
embeddingsCount !== 0
? await VectorDb.performSimilaritySearch({
namespace: workspace.slug,
input: message,
LLMConnector,
similarityThreshold: workspace?.similarityThreshold,
topN: workspace?.topN,
filterIdentifiers: pinnedDocIdentifiers,
})
: {
contextTexts: [],
sources: [],
message: null,
};
// Failed similarity search if it was run at all and failed.
if (!!vectorSearchResults.message) {
return {
id: uuid,
type: "abort",
textResponse: null,
sources: [],
close: true,
error: vectorSearchResults.message,
};
}
const { fillSourceWindow } = require("../helpers/chat");
const filledSources = fillSourceWindow({
nDocs: workspace?.topN || 4,
searchResults: vectorSearchResults.sources,
history: rawHistory,
filterIdentifiers: pinnedDocIdentifiers,
});
// Why does contextTexts get all the info, but sources only get current search?
// This is to give the ability of the LLM to "comprehend" a contextual response without
// populating the Citations under a response with documents the user "thinks" are irrelevant
// due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
// If a past citation was used to answer the question - that is visible in the history so it logically makes sense
// and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
// TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
contextTexts = [...contextTexts, ...filledSources.contextTexts];
sources = [...sources, ...vectorSearchResults.sources];
// If in query mode and no context chunks are found from search, backfill, or pins - do not
// let the LLM try to hallucinate a response or use general knowledge and exit early
if (chatMode === "query" && contextTexts.length === 0) {
const textResponse =
workspace?.queryRefusalResponse ??
"There is no relevant information in this workspace to answer your query.";
await WorkspaceChats.new({
workspaceId: workspace.id,
prompt: message,
response: {
text: textResponse,
sources: [],
type: chatMode,
},
threadId: thread?.id || null,
include: false,
user,
});
return {
id: uuid,
type: "textResponse",
sources: [],
close: true,
error: null,
textResponse,
};
}
// Compress & Assemble message to ensure prompt passes token limit with room for response
// and build system messages based on inputs and history.
const messages = await LLMConnector.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: updatedMessage,
contextTexts,
chatHistory,
},
rawHistory
);
// Send the text completion.
const textResponse = await LLMConnector.getChatCompletion(messages, {
temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp,
});
if (!textResponse) {
return {
id: uuid,
type: "abort",
textResponse: null,
sources: [],
close: true,
error: "No text completion could be completed with this input.",
};
}
const { chat } = await WorkspaceChats.new({
workspaceId: workspace.id,
prompt: message,
response: { text: textResponse, sources, type: chatMode },
threadId: thread?.id || null,
user,
});
return {
id: uuid,
type: "textResponse",
close: true,
error: null,
chatId: chat.id,
textResponse,
sources,
};
}
async function recentChatHistory({ async function recentChatHistory({
user = null, user = null,
workspace, workspace,