2024-08-21 23:47:06 +02:00
|
|
|
const { v4: uuidv4 } = require("uuid");
|
|
|
|
const { DocumentManager } = require("../DocumentManager");
|
|
|
|
const { WorkspaceChats } = require("../../models/workspaceChats");
|
|
|
|
const { getVectorDbClass, getLLMProvider } = require("../helpers");
|
|
|
|
const { writeResponseChunk } = require("../helpers/chat/responses");
|
|
|
|
const { chatPrompt, sourceIdentifier, recentChatHistory } = require("./index");
|
2024-08-22 22:12:09 +02:00
|
|
|
const {
|
|
|
|
EphemeralAgentHandler,
|
|
|
|
EphemeralEventListener,
|
|
|
|
} = require("../agents/ephemeral");
|
|
|
|
const { Telemetry } = require("../../models/telemetry");
|
2024-08-21 23:47:06 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @typedef ResponseObject
|
|
|
|
* @property {string} id - uuid of response
|
|
|
|
* @property {string} type - Type of response
|
|
|
|
* @property {string|null} textResponse - full text response
|
|
|
|
* @property {object[]} sources
|
|
|
|
* @property {boolean} close
|
|
|
|
* @property {string|null} error
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Handle synchronous chats with your workspace via the developer API endpoint
|
|
|
|
* @param {{
|
|
|
|
* workspace: import("@prisma/client").workspaces,
|
|
|
|
* message:string,
|
|
|
|
* mode: "chat"|"query",
|
|
|
|
* user: import("@prisma/client").users|null,
|
|
|
|
* thread: import("@prisma/client").workspace_threads|null,
|
2024-08-22 00:25:47 +02:00
|
|
|
* sessionId: string|null,
|
2024-08-21 23:47:06 +02:00
|
|
|
* }} parameters
|
|
|
|
* @returns {Promise<ResponseObject>}
|
|
|
|
*/
|
|
|
|
async function chatSync({
|
|
|
|
workspace,
|
|
|
|
message = null,
|
|
|
|
mode = "chat",
|
|
|
|
user = null,
|
|
|
|
thread = null,
|
2024-08-22 00:25:47 +02:00
|
|
|
sessionId = null,
|
2024-08-21 23:47:06 +02:00
|
|
|
}) {
|
|
|
|
const uuid = uuidv4();
|
|
|
|
const chatMode = mode ?? "chat";
|
2024-08-22 22:12:09 +02:00
|
|
|
|
|
|
|
if (EphemeralAgentHandler.isAgentInvocation({ message })) {
|
|
|
|
await Telemetry.sendTelemetry("agent_chat_started");
|
|
|
|
|
|
|
|
// Initialize the EphemeralAgentHandler to handle non-continuous
|
|
|
|
// conversations with agents since this is over REST.
|
|
|
|
const agentHandler = new EphemeralAgentHandler({
|
|
|
|
uuid,
|
|
|
|
workspace,
|
|
|
|
prompt: message,
|
|
|
|
userId: user?.id || null,
|
|
|
|
threadId: thread?.id || null,
|
|
|
|
sessionId,
|
|
|
|
});
|
|
|
|
|
|
|
|
// Establish event listener that emulates websocket calls
|
|
|
|
// in Aibitat so that we can keep the same interface in Aibitat
|
|
|
|
// but use HTTP.
|
|
|
|
const eventListener = new EphemeralEventListener();
|
|
|
|
await agentHandler.init();
|
|
|
|
await agentHandler.createAIbitat({ handler: eventListener });
|
|
|
|
agentHandler.startAgentCluster();
|
|
|
|
|
|
|
|
// The cluster has started and now we wait for close event since
|
|
|
|
// this is a synchronous call for an agent, so we return everything at once.
|
|
|
|
// After this, we conclude the call as we normally do.
|
|
|
|
return await eventListener
|
|
|
|
.waitForClose()
|
|
|
|
.then(async ({ thoughts, textResponse }) => {
|
|
|
|
await WorkspaceChats.new({
|
|
|
|
workspaceId: workspace.id,
|
|
|
|
prompt: String(message),
|
|
|
|
response: {
|
|
|
|
text: textResponse,
|
|
|
|
sources: [],
|
|
|
|
type: chatMode,
|
|
|
|
thoughts,
|
|
|
|
},
|
|
|
|
include: false,
|
|
|
|
apiSessionId: sessionId,
|
|
|
|
});
|
|
|
|
return {
|
|
|
|
id: uuid,
|
|
|
|
type: "textResponse",
|
|
|
|
sources: [],
|
|
|
|
close: true,
|
|
|
|
error: null,
|
|
|
|
textResponse,
|
|
|
|
thoughts,
|
|
|
|
};
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2024-08-21 23:47:06 +02:00
|
|
|
const LLMConnector = getLLMProvider({
|
|
|
|
provider: workspace?.chatProvider,
|
|
|
|
model: workspace?.chatModel,
|
|
|
|
});
|
|
|
|
const VectorDb = getVectorDbClass();
|
|
|
|
const messageLimit = workspace?.openAiHistory || 20;
|
|
|
|
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
|
|
|
|
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
|
|
|
|
|
|
|
|
// User is trying to query-mode chat a workspace that has no data in it - so
|
|
|
|
// we should exit early as no information can be found under these conditions.
|
|
|
|
if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
|
|
|
|
const textResponse =
|
|
|
|
workspace?.queryRefusalResponse ??
|
|
|
|
"There is no relevant information in this workspace to answer your query.";
|
|
|
|
|
|
|
|
await WorkspaceChats.new({
|
|
|
|
workspaceId: workspace.id,
|
|
|
|
prompt: String(message),
|
|
|
|
response: {
|
|
|
|
text: textResponse,
|
|
|
|
sources: [],
|
|
|
|
type: chatMode,
|
|
|
|
},
|
|
|
|
include: false,
|
2024-08-22 00:25:47 +02:00
|
|
|
apiSessionId: sessionId,
|
2024-08-21 23:47:06 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
return {
|
|
|
|
id: uuid,
|
|
|
|
type: "textResponse",
|
|
|
|
sources: [],
|
|
|
|
close: true,
|
|
|
|
error: null,
|
|
|
|
textResponse,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we are here we know that we are in a workspace that is:
|
|
|
|
// 1. Chatting in "chat" mode and may or may _not_ have embeddings
|
|
|
|
// 2. Chatting in "query" mode and has at least 1 embedding
|
|
|
|
let contextTexts = [];
|
|
|
|
let sources = [];
|
|
|
|
let pinnedDocIdentifiers = [];
|
|
|
|
const { rawHistory, chatHistory } = await recentChatHistory({
|
|
|
|
user,
|
|
|
|
workspace,
|
|
|
|
thread,
|
|
|
|
messageLimit,
|
2024-08-22 00:25:47 +02:00
|
|
|
apiSessionId: sessionId,
|
2024-08-21 23:47:06 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
await new DocumentManager({
|
|
|
|
workspace,
|
|
|
|
maxTokens: LLMConnector.promptWindowLimit(),
|
|
|
|
})
|
|
|
|
.pinnedDocs()
|
|
|
|
.then((pinnedDocs) => {
|
|
|
|
pinnedDocs.forEach((doc) => {
|
|
|
|
const { pageContent, ...metadata } = doc;
|
|
|
|
pinnedDocIdentifiers.push(sourceIdentifier(doc));
|
|
|
|
contextTexts.push(doc.pageContent);
|
|
|
|
sources.push({
|
|
|
|
text:
|
|
|
|
pageContent.slice(0, 1_000) +
|
|
|
|
"...continued on in source document...",
|
|
|
|
...metadata,
|
|
|
|
});
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
const vectorSearchResults =
|
|
|
|
embeddingsCount !== 0
|
|
|
|
? await VectorDb.performSimilaritySearch({
|
|
|
|
namespace: workspace.slug,
|
|
|
|
input: message,
|
|
|
|
LLMConnector,
|
|
|
|
similarityThreshold: workspace?.similarityThreshold,
|
|
|
|
topN: workspace?.topN,
|
|
|
|
filterIdentifiers: pinnedDocIdentifiers,
|
|
|
|
})
|
|
|
|
: {
|
|
|
|
contextTexts: [],
|
|
|
|
sources: [],
|
|
|
|
message: null,
|
|
|
|
};
|
|
|
|
|
|
|
|
// Failed similarity search if it was run at all and failed.
|
|
|
|
if (!!vectorSearchResults.message) {
|
|
|
|
return {
|
|
|
|
id: uuid,
|
|
|
|
type: "abort",
|
|
|
|
textResponse: null,
|
|
|
|
sources: [],
|
|
|
|
close: true,
|
|
|
|
error: vectorSearchResults.message,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
const { fillSourceWindow } = require("../helpers/chat");
|
|
|
|
const filledSources = fillSourceWindow({
|
|
|
|
nDocs: workspace?.topN || 4,
|
|
|
|
searchResults: vectorSearchResults.sources,
|
|
|
|
history: rawHistory,
|
|
|
|
filterIdentifiers: pinnedDocIdentifiers,
|
|
|
|
});
|
|
|
|
|
|
|
|
// Why does contextTexts get all the info, but sources only get current search?
|
|
|
|
// This is to give the ability of the LLM to "comprehend" a contextual response without
|
|
|
|
// populating the Citations under a response with documents the user "thinks" are irrelevant
|
|
|
|
// due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
|
|
|
|
// If a past citation was used to answer the question - that is visible in the history so it logically makes sense
|
|
|
|
// and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
|
|
|
|
// TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
|
|
|
|
contextTexts = [...contextTexts, ...filledSources.contextTexts];
|
|
|
|
sources = [...sources, ...vectorSearchResults.sources];
|
|
|
|
|
|
|
|
// If in query mode and no context chunks are found from search, backfill, or pins - do not
|
|
|
|
// let the LLM try to hallucinate a response or use general knowledge and exit early
|
|
|
|
if (chatMode === "query" && contextTexts.length === 0) {
|
|
|
|
const textResponse =
|
|
|
|
workspace?.queryRefusalResponse ??
|
|
|
|
"There is no relevant information in this workspace to answer your query.";
|
|
|
|
|
|
|
|
await WorkspaceChats.new({
|
|
|
|
workspaceId: workspace.id,
|
|
|
|
prompt: message,
|
|
|
|
response: {
|
|
|
|
text: textResponse,
|
|
|
|
sources: [],
|
|
|
|
type: chatMode,
|
|
|
|
},
|
|
|
|
threadId: thread?.id || null,
|
|
|
|
include: false,
|
2024-08-22 00:25:47 +02:00
|
|
|
apiSessionId: sessionId,
|
2024-08-21 23:47:06 +02:00
|
|
|
user,
|
|
|
|
});
|
|
|
|
|
|
|
|
return {
|
|
|
|
id: uuid,
|
|
|
|
type: "textResponse",
|
|
|
|
sources: [],
|
|
|
|
close: true,
|
|
|
|
error: null,
|
|
|
|
textResponse,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compress & Assemble message to ensure prompt passes token limit with room for response
|
|
|
|
// and build system messages based on inputs and history.
|
|
|
|
const messages = await LLMConnector.compressMessages(
|
|
|
|
{
|
|
|
|
systemPrompt: chatPrompt(workspace),
|
|
|
|
userPrompt: message,
|
|
|
|
contextTexts,
|
|
|
|
chatHistory,
|
|
|
|
},
|
|
|
|
rawHistory
|
|
|
|
);
|
|
|
|
|
|
|
|
// Send the text completion.
|
|
|
|
const textResponse = await LLMConnector.getChatCompletion(messages, {
|
|
|
|
temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp,
|
|
|
|
});
|
|
|
|
|
|
|
|
if (!textResponse) {
|
|
|
|
return {
|
|
|
|
id: uuid,
|
|
|
|
type: "abort",
|
|
|
|
textResponse: null,
|
|
|
|
sources: [],
|
|
|
|
close: true,
|
|
|
|
error: "No text completion could be completed with this input.",
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
const { chat } = await WorkspaceChats.new({
|
|
|
|
workspaceId: workspace.id,
|
|
|
|
prompt: message,
|
|
|
|
response: { text: textResponse, sources, type: chatMode },
|
|
|
|
threadId: thread?.id || null,
|
2024-08-22 00:25:47 +02:00
|
|
|
apiSessionId: sessionId,
|
2024-08-21 23:47:06 +02:00
|
|
|
user,
|
|
|
|
});
|
|
|
|
|
|
|
|
return {
|
|
|
|
id: uuid,
|
|
|
|
type: "textResponse",
|
|
|
|
close: true,
|
|
|
|
error: null,
|
|
|
|
chatId: chat.id,
|
|
|
|
textResponse,
|
|
|
|
sources,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Handle streamable HTTP chunks for chats with your workspace via the developer API endpoint
|
|
|
|
* @param {{
|
|
|
|
* response: import("express").Response,
|
|
|
|
* workspace: import("@prisma/client").workspaces,
|
|
|
|
* message:string,
|
|
|
|
* mode: "chat"|"query",
|
|
|
|
* user: import("@prisma/client").users|null,
|
|
|
|
* thread: import("@prisma/client").workspace_threads|null,
|
2024-08-22 00:25:47 +02:00
|
|
|
* sessionId: string|null,
|
2024-08-21 23:47:06 +02:00
|
|
|
* }} parameters
|
|
|
|
* @returns {Promise<VoidFunction>}
|
|
|
|
*/
|
|
|
|
async function streamChat({
|
|
|
|
response,
|
|
|
|
workspace,
|
|
|
|
message = null,
|
|
|
|
mode = "chat",
|
|
|
|
user = null,
|
|
|
|
thread = null,
|
2024-08-22 00:25:47 +02:00
|
|
|
sessionId = null,
|
2024-08-21 23:47:06 +02:00
|
|
|
}) {
|
|
|
|
const uuid = uuidv4();
|
|
|
|
const chatMode = mode ?? "chat";
|
2024-08-22 22:12:09 +02:00
|
|
|
|
|
|
|
if (EphemeralAgentHandler.isAgentInvocation({ message })) {
|
|
|
|
await Telemetry.sendTelemetry("agent_chat_started");
|
|
|
|
|
|
|
|
// Initialize the EphemeralAgentHandler to handle non-continuous
|
|
|
|
// conversations with agents since this is over REST.
|
|
|
|
const agentHandler = new EphemeralAgentHandler({
|
|
|
|
uuid,
|
|
|
|
workspace,
|
|
|
|
prompt: message,
|
|
|
|
userId: user?.id || null,
|
|
|
|
threadId: thread?.id || null,
|
|
|
|
sessionId,
|
|
|
|
});
|
|
|
|
|
|
|
|
// Establish event listener that emulates websocket calls
|
|
|
|
// in Aibitat so that we can keep the same interface in Aibitat
|
|
|
|
// but use HTTP.
|
|
|
|
const eventListener = new EphemeralEventListener();
|
|
|
|
await agentHandler.init();
|
|
|
|
await agentHandler.createAIbitat({ handler: eventListener });
|
|
|
|
agentHandler.startAgentCluster();
|
|
|
|
|
|
|
|
// The cluster has started and now we wait for close event since
|
|
|
|
// and stream back any results we get from agents as they come in.
|
|
|
|
return eventListener
|
|
|
|
.streamAgentEvents(response, uuid)
|
|
|
|
.then(async ({ thoughts, textResponse }) => {
|
|
|
|
console.log({ thoughts, textResponse });
|
|
|
|
await WorkspaceChats.new({
|
|
|
|
workspaceId: workspace.id,
|
|
|
|
prompt: String(message),
|
|
|
|
response: {
|
|
|
|
text: textResponse,
|
|
|
|
sources: [],
|
|
|
|
type: chatMode,
|
|
|
|
thoughts,
|
|
|
|
},
|
|
|
|
include: false,
|
|
|
|
apiSessionId: sessionId,
|
|
|
|
});
|
|
|
|
writeResponseChunk(response, {
|
|
|
|
uuid,
|
|
|
|
type: "finalizeResponseStream",
|
|
|
|
textResponse,
|
|
|
|
thoughts,
|
|
|
|
close: true,
|
|
|
|
error: false,
|
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2024-08-21 23:47:06 +02:00
|
|
|
const LLMConnector = getLLMProvider({
|
|
|
|
provider: workspace?.chatProvider,
|
|
|
|
model: workspace?.chatModel,
|
|
|
|
});
|
|
|
|
|
|
|
|
const VectorDb = getVectorDbClass();
|
|
|
|
const messageLimit = workspace?.openAiHistory || 20;
|
|
|
|
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
|
|
|
|
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
|
|
|
|
|
|
|
|
// User is trying to query-mode chat a workspace that has no data in it - so
|
|
|
|
// we should exit early as no information can be found under these conditions.
|
|
|
|
if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
|
|
|
|
const textResponse =
|
|
|
|
workspace?.queryRefusalResponse ??
|
|
|
|
"There is no relevant information in this workspace to answer your query.";
|
|
|
|
writeResponseChunk(response, {
|
|
|
|
id: uuid,
|
|
|
|
type: "textResponse",
|
|
|
|
textResponse,
|
|
|
|
sources: [],
|
|
|
|
attachments: [],
|
|
|
|
close: true,
|
|
|
|
error: null,
|
|
|
|
});
|
|
|
|
await WorkspaceChats.new({
|
|
|
|
workspaceId: workspace.id,
|
|
|
|
prompt: message,
|
|
|
|
response: {
|
|
|
|
text: textResponse,
|
|
|
|
sources: [],
|
|
|
|
type: chatMode,
|
|
|
|
attachments: [],
|
|
|
|
},
|
|
|
|
threadId: thread?.id || null,
|
2024-08-22 00:25:47 +02:00
|
|
|
apiSessionId: sessionId,
|
2024-08-21 23:47:06 +02:00
|
|
|
include: false,
|
|
|
|
user,
|
|
|
|
});
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we are here we know that we are in a workspace that is:
|
|
|
|
// 1. Chatting in "chat" mode and may or may _not_ have embeddings
|
|
|
|
// 2. Chatting in "query" mode and has at least 1 embedding
|
|
|
|
let completeText;
|
|
|
|
let contextTexts = [];
|
|
|
|
let sources = [];
|
|
|
|
let pinnedDocIdentifiers = [];
|
|
|
|
const { rawHistory, chatHistory } = await recentChatHistory({
|
|
|
|
user,
|
|
|
|
workspace,
|
|
|
|
thread,
|
|
|
|
messageLimit,
|
2024-08-22 00:25:47 +02:00
|
|
|
apiSessionId: sessionId,
|
2024-08-21 23:47:06 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
// Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
|
|
|
|
// as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window.
|
|
|
|
// However we limit the maximum of appended context to 80% of its overall size, mostly because if it expands beyond this
|
|
|
|
// it will undergo prompt compression anyway to make it work. If there is so much pinned that the context here is bigger than
|
|
|
|
// what the model can support - it would get compressed anyway and that really is not the point of pinning. It is really best
|
|
|
|
// suited for high-context models.
|
|
|
|
await new DocumentManager({
|
|
|
|
workspace,
|
|
|
|
maxTokens: LLMConnector.promptWindowLimit(),
|
|
|
|
})
|
|
|
|
.pinnedDocs()
|
|
|
|
.then((pinnedDocs) => {
|
|
|
|
pinnedDocs.forEach((doc) => {
|
|
|
|
const { pageContent, ...metadata } = doc;
|
|
|
|
pinnedDocIdentifiers.push(sourceIdentifier(doc));
|
|
|
|
contextTexts.push(doc.pageContent);
|
|
|
|
sources.push({
|
|
|
|
text:
|
|
|
|
pageContent.slice(0, 1_000) +
|
|
|
|
"...continued on in source document...",
|
|
|
|
...metadata,
|
|
|
|
});
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
const vectorSearchResults =
|
|
|
|
embeddingsCount !== 0
|
|
|
|
? await VectorDb.performSimilaritySearch({
|
|
|
|
namespace: workspace.slug,
|
|
|
|
input: message,
|
|
|
|
LLMConnector,
|
|
|
|
similarityThreshold: workspace?.similarityThreshold,
|
|
|
|
topN: workspace?.topN,
|
|
|
|
filterIdentifiers: pinnedDocIdentifiers,
|
|
|
|
})
|
|
|
|
: {
|
|
|
|
contextTexts: [],
|
|
|
|
sources: [],
|
|
|
|
message: null,
|
|
|
|
};
|
|
|
|
|
|
|
|
// Failed similarity search if it was run at all and failed.
|
|
|
|
if (!!vectorSearchResults.message) {
|
|
|
|
writeResponseChunk(response, {
|
|
|
|
id: uuid,
|
|
|
|
type: "abort",
|
|
|
|
textResponse: null,
|
|
|
|
sources: [],
|
|
|
|
close: true,
|
|
|
|
error: vectorSearchResults.message,
|
|
|
|
});
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
const { fillSourceWindow } = require("../helpers/chat");
|
|
|
|
const filledSources = fillSourceWindow({
|
|
|
|
nDocs: workspace?.topN || 4,
|
|
|
|
searchResults: vectorSearchResults.sources,
|
|
|
|
history: rawHistory,
|
|
|
|
filterIdentifiers: pinnedDocIdentifiers,
|
|
|
|
});
|
|
|
|
|
|
|
|
// Why does contextTexts get all the info, but sources only get current search?
|
|
|
|
// This is to give the ability of the LLM to "comprehend" a contextual response without
|
|
|
|
// populating the Citations under a response with documents the user "thinks" are irrelevant
|
|
|
|
// due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
|
|
|
|
// If a past citation was used to answer the question - that is visible in the history so it logically makes sense
|
|
|
|
// and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
|
|
|
|
// TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
|
|
|
|
contextTexts = [...contextTexts, ...filledSources.contextTexts];
|
|
|
|
sources = [...sources, ...vectorSearchResults.sources];
|
|
|
|
|
|
|
|
// If in query mode and no context chunks are found from search, backfill, or pins - do not
|
|
|
|
// let the LLM try to hallucinate a response or use general knowledge and exit early
|
|
|
|
if (chatMode === "query" && contextTexts.length === 0) {
|
|
|
|
const textResponse =
|
|
|
|
workspace?.queryRefusalResponse ??
|
|
|
|
"There is no relevant information in this workspace to answer your query.";
|
|
|
|
writeResponseChunk(response, {
|
|
|
|
id: uuid,
|
|
|
|
type: "textResponse",
|
|
|
|
textResponse,
|
|
|
|
sources: [],
|
|
|
|
close: true,
|
|
|
|
error: null,
|
|
|
|
});
|
|
|
|
|
|
|
|
await WorkspaceChats.new({
|
|
|
|
workspaceId: workspace.id,
|
|
|
|
prompt: message,
|
|
|
|
response: {
|
|
|
|
text: textResponse,
|
|
|
|
sources: [],
|
|
|
|
type: chatMode,
|
|
|
|
attachments: [],
|
|
|
|
},
|
|
|
|
threadId: thread?.id || null,
|
2024-08-22 00:25:47 +02:00
|
|
|
apiSessionId: sessionId,
|
2024-08-21 23:47:06 +02:00
|
|
|
include: false,
|
|
|
|
user,
|
|
|
|
});
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compress & Assemble message to ensure prompt passes token limit with room for response
|
|
|
|
// and build system messages based on inputs and history.
|
|
|
|
const messages = await LLMConnector.compressMessages(
|
|
|
|
{
|
|
|
|
systemPrompt: chatPrompt(workspace),
|
|
|
|
userPrompt: message,
|
|
|
|
contextTexts,
|
|
|
|
chatHistory,
|
|
|
|
},
|
|
|
|
rawHistory
|
|
|
|
);
|
|
|
|
|
|
|
|
// If streaming is not explicitly enabled for connector
|
|
|
|
// we do regular waiting of a response and send a single chunk.
|
|
|
|
if (LLMConnector.streamingEnabled() !== true) {
|
|
|
|
console.log(
|
|
|
|
`\x1b[31m[STREAMING DISABLED]\x1b[0m Streaming is not available for ${LLMConnector.constructor.name}. Will use regular chat method.`
|
|
|
|
);
|
|
|
|
completeText = await LLMConnector.getChatCompletion(messages, {
|
|
|
|
temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp,
|
|
|
|
});
|
|
|
|
writeResponseChunk(response, {
|
|
|
|
uuid,
|
|
|
|
sources,
|
|
|
|
type: "textResponseChunk",
|
|
|
|
textResponse: completeText,
|
|
|
|
close: true,
|
|
|
|
error: false,
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
const stream = await LLMConnector.streamGetChatCompletion(messages, {
|
|
|
|
temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp,
|
|
|
|
});
|
|
|
|
completeText = await LLMConnector.handleStream(response, stream, {
|
|
|
|
uuid,
|
|
|
|
sources,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
if (completeText?.length > 0) {
|
|
|
|
const { chat } = await WorkspaceChats.new({
|
|
|
|
workspaceId: workspace.id,
|
|
|
|
prompt: message,
|
|
|
|
response: { text: completeText, sources, type: chatMode },
|
|
|
|
threadId: thread?.id || null,
|
2024-08-22 00:25:47 +02:00
|
|
|
apiSessionId: sessionId,
|
2024-08-21 23:47:06 +02:00
|
|
|
user,
|
|
|
|
});
|
|
|
|
|
|
|
|
writeResponseChunk(response, {
|
|
|
|
uuid,
|
|
|
|
type: "finalizeResponseStream",
|
|
|
|
close: true,
|
|
|
|
error: false,
|
|
|
|
chatId: chat.id,
|
|
|
|
});
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
writeResponseChunk(response, {
|
|
|
|
uuid,
|
|
|
|
type: "finalizeResponseStream",
|
|
|
|
close: true,
|
|
|
|
error: false,
|
|
|
|
});
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports.ApiChatHandler = {
|
|
|
|
chatSync,
|
|
|
|
streamChat,
|
|
|
|
};
|