Refactor LLM chat backend (#717)

* refactor stream/chat/embed-stram to be a single execution logic path so that it is easier to maintain and build upon

* no thread in sync chat since only api uses it
adjust import locations
This commit is contained in:
Timothy Carambat 2024-02-14 12:32:07 -08:00 committed by GitHub
parent 161dc5f901
commit c59ab9da0a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 287 additions and 468 deletions

View File

@ -4,19 +4,19 @@ const { Telemetry } = require("../../../models/telemetry");
const { DocumentVectors } = require("../../../models/vectors"); const { DocumentVectors } = require("../../../models/vectors");
const { Workspace } = require("../../../models/workspace"); const { Workspace } = require("../../../models/workspace");
const { WorkspaceChats } = require("../../../models/workspaceChats"); const { WorkspaceChats } = require("../../../models/workspaceChats");
const { const { chatWithWorkspace } = require("../../../utils/chats");
convertToChatHistory,
chatWithWorkspace,
} = require("../../../utils/chats");
const { getVectorDbClass } = require("../../../utils/helpers"); const { getVectorDbClass } = require("../../../utils/helpers");
const { multiUserMode, reqBody } = require("../../../utils/http"); const { multiUserMode, reqBody } = require("../../../utils/http");
const { validApiKey } = require("../../../utils/middleware/validApiKey"); const { validApiKey } = require("../../../utils/middleware/validApiKey");
const { const {
streamChatWithWorkspace, streamChatWithWorkspace,
writeResponseChunk,
VALID_CHAT_MODE, VALID_CHAT_MODE,
} = require("../../../utils/chats/stream"); } = require("../../../utils/chats/stream");
const { EventLogs } = require("../../../models/eventLogs"); const { EventLogs } = require("../../../models/eventLogs");
const {
convertToChatHistory,
writeResponseChunk,
} = require("../../../utils/helpers/chat/responses");
function apiWorkspaceEndpoints(app) { function apiWorkspaceEndpoints(app) {
if (!app) return; if (!app) return;

View File

@ -7,7 +7,6 @@ const { SystemSettings } = require("../models/systemSettings");
const { Telemetry } = require("../models/telemetry"); const { Telemetry } = require("../models/telemetry");
const { const {
streamChatWithWorkspace, streamChatWithWorkspace,
writeResponseChunk,
VALID_CHAT_MODE, VALID_CHAT_MODE,
} = require("../utils/chats/stream"); } = require("../utils/chats/stream");
const { const {
@ -18,6 +17,7 @@ const { EventLogs } = require("../models/eventLogs");
const { const {
validWorkspaceAndThreadSlug, validWorkspaceAndThreadSlug,
} = require("../utils/middleware/validWorkspace"); } = require("../utils/middleware/validWorkspace");
const { writeResponseChunk } = require("../utils/helpers/chat/responses");
function chatEndpoints(app) { function chatEndpoints(app) {
if (!app) return; if (!app) return;

View File

@ -1,15 +1,17 @@
const { v4: uuidv4 } = require("uuid"); const { v4: uuidv4 } = require("uuid");
const { reqBody, multiUserMode } = require("../../utils/http"); const { reqBody, multiUserMode } = require("../../utils/http");
const { Telemetry } = require("../../models/telemetry"); const { Telemetry } = require("../../models/telemetry");
const { writeResponseChunk } = require("../../utils/chats/stream");
const { streamChatWithForEmbed } = require("../../utils/chats/embed"); const { streamChatWithForEmbed } = require("../../utils/chats/embed");
const { convertToChatHistory } = require("../../utils/chats");
const { EmbedChats } = require("../../models/embedChats"); const { EmbedChats } = require("../../models/embedChats");
const { const {
validEmbedConfig, validEmbedConfig,
canRespond, canRespond,
setConnectionMeta, setConnectionMeta,
} = require("../../utils/middleware/embedMiddleware"); } = require("../../utils/middleware/embedMiddleware");
const {
convertToChatHistory,
writeResponseChunk,
} = require("../../utils/helpers/chat/responses");
function embeddedEndpoints(app) { function embeddedEndpoints(app) {
if (!app) return; if (!app) return;

View File

@ -12,7 +12,7 @@ const {
validWorkspaceAndThreadSlug, validWorkspaceAndThreadSlug,
} = require("../utils/middleware/validWorkspace"); } = require("../utils/middleware/validWorkspace");
const { WorkspaceChats } = require("../models/workspaceChats"); const { WorkspaceChats } = require("../models/workspaceChats");
const { convertToChatHistory } = require("../utils/chats"); const { convertToChatHistory } = require("../utils/helpers/chat/responses");
function workspaceThreadEndpoints(app) { function workspaceThreadEndpoints(app) {
if (!app) return; if (!app) return;

View File

@ -3,7 +3,6 @@ const { Workspace } = require("../models/workspace");
const { Document } = require("../models/documents"); const { Document } = require("../models/documents");
const { DocumentVectors } = require("../models/vectors"); const { DocumentVectors } = require("../models/vectors");
const { WorkspaceChats } = require("../models/workspaceChats"); const { WorkspaceChats } = require("../models/workspaceChats");
const { convertToChatHistory } = require("../utils/chats");
const { getVectorDbClass } = require("../utils/helpers"); const { getVectorDbClass } = require("../utils/helpers");
const { setupMulter } = require("../utils/files/multer"); const { setupMulter } = require("../utils/files/multer");
const { const {
@ -22,6 +21,7 @@ const {
WorkspaceSuggestedMessages, WorkspaceSuggestedMessages,
} = require("../models/workspacesSuggestedMessages"); } = require("../models/workspacesSuggestedMessages");
const { validWorkspaceSlug } = require("../utils/middleware/validWorkspace"); const { validWorkspaceSlug } = require("../utils/middleware/validWorkspace");
const { convertToChatHistory } = require("../utils/helpers/chat/responses");
const { handleUploads } = setupMulter(); const { handleUploads } = setupMulter();
function workspaceEndpoints(app) { function workspaceEndpoints(app) {

View File

@ -1,6 +1,6 @@
const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi"); const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../chats/stream"); const { writeResponseChunk } = require("../../helpers/chat/responses");
class AzureOpenAiLLM { class AzureOpenAiLLM {
constructor(embedder = null, _modelPreference = null) { constructor(embedder = null, _modelPreference = null) {

View File

@ -1,5 +1,5 @@
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../chats/stream"); const { writeResponseChunk } = require("../../helpers/chat/responses");
class GeminiLLM { class GeminiLLM {
constructor(embedder = null, modelPreference = null) { constructor(embedder = null, modelPreference = null) {

View File

@ -1,7 +1,7 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native"); const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi"); const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../chats/stream"); const { writeResponseChunk } = require("../../helpers/chat/responses");
class HuggingFaceLLM { class HuggingFaceLLM {
constructor(embedder = null, _modelPreference = null) { constructor(embedder = null, _modelPreference = null) {

View File

@ -1,5 +1,5 @@
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { handleDefaultStreamResponse } = require("../../chats/stream"); const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
// hybrid of openAi LLM chat completion for LMStudio // hybrid of openAi LLM chat completion for LMStudio
class LMStudioLLM { class LMStudioLLM {

View File

@ -1,5 +1,5 @@
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { handleDefaultStreamResponse } = require("../../chats/stream"); const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
class LocalAiLLM { class LocalAiLLM {
constructor(embedder = null, modelPreference = null) { constructor(embedder = null, modelPreference = null) {

View File

@ -1,5 +1,5 @@
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { handleDefaultStreamResponse } = require("../../chats/stream"); const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
class MistralLLM { class MistralLLM {
constructor(embedder = null, modelPreference = null) { constructor(embedder = null, modelPreference = null) {

View File

@ -2,7 +2,7 @@ const fs = require("fs");
const path = require("path"); const path = require("path");
const { NativeEmbedder } = require("../../EmbeddingEngines/native"); const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../chats/stream"); const { writeResponseChunk } = require("../../helpers/chat/responses");
// Docs: https://api.js.langchain.com/classes/chat_models_llama_cpp.ChatLlamaCpp.html // Docs: https://api.js.langchain.com/classes/chat_models_llama_cpp.ChatLlamaCpp.html
const ChatLlamaCpp = (...args) => const ChatLlamaCpp = (...args) =>

View File

@ -1,6 +1,6 @@
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { StringOutputParser } = require("langchain/schema/output_parser"); const { StringOutputParser } = require("langchain/schema/output_parser");
const { writeResponseChunk } = require("../../chats/stream"); const { writeResponseChunk } = require("../../helpers/chat/responses");
// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md // Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
class OllamaAILLM { class OllamaAILLM {

View File

@ -1,6 +1,6 @@
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi"); const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { handleDefaultStreamResponse } = require("../../chats/stream"); const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
class OpenAiLLM { class OpenAiLLM {
constructor(embedder = null, modelPreference = null) { constructor(embedder = null, modelPreference = null) {

View File

@ -1,5 +1,5 @@
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../chats/stream"); const { writeResponseChunk } = require("../../helpers/chat/responses");
function togetherAiModels() { function togetherAiModels() {
const { MODELS } = require("./models.js"); const { MODELS } = require("./models.js");

View File

@ -1,8 +1,11 @@
const { v4: uuidv4 } = require("uuid"); const { v4: uuidv4 } = require("uuid");
const { getVectorDbClass, getLLMProvider } = require("../helpers"); const { getVectorDbClass, getLLMProvider } = require("../helpers");
const { chatPrompt, convertToPromptHistory } = require("."); const { chatPrompt } = require("./index");
const { writeResponseChunk } = require("./stream");
const { EmbedChats } = require("../../models/embedChats"); const { EmbedChats } = require("../../models/embedChats");
const {
convertToPromptHistory,
writeResponseChunk,
} = require("../helpers/chat/responses");
async function streamChatWithForEmbed( async function streamChatWithForEmbed(
response, response,
@ -44,30 +47,20 @@ async function streamChatWithForEmbed(
const messageLimit = 20; const messageLimit = 20;
const hasVectorizedSpace = await VectorDb.hasNamespace(embed.workspace.slug); const hasVectorizedSpace = await VectorDb.hasNamespace(embed.workspace.slug);
const embeddingsCount = await VectorDb.namespaceCount(embed.workspace.slug); const embeddingsCount = await VectorDb.namespaceCount(embed.workspace.slug);
if (!hasVectorizedSpace || embeddingsCount === 0) {
if (chatMode === "query") {
writeResponseChunk(response, {
id: uuid,
type: "textResponse",
textResponse:
"I do not have enough information to answer that. Try another question.",
sources: [],
close: true,
error: null,
});
return;
}
// If there are no embeddings - chat like a normal LLM chat interface. // User is trying to query-mode chat a workspace that has no data in it - so
return await streamEmptyEmbeddingChat({ // we should exit early as no information can be found under these conditions.
response, if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
uuid, writeResponseChunk(response, {
sessionId, id: uuid,
message, type: "textResponse",
embed, textResponse:
messageLimit, "I do not have enough information to answer that. Try another question.",
LLMConnector, sources: [],
close: true,
error: null,
}); });
return;
} }
let completeText; let completeText;
@ -77,17 +70,24 @@ async function streamChatWithForEmbed(
messageLimit, messageLimit,
chatMode chatMode
); );
const { const {
contextTexts = [], contextTexts = [],
sources = [], sources = [],
message: error, message: error,
} = await VectorDb.performSimilaritySearch({ } = embeddingsCount !== 0 // if there no embeddings don't bother searching.
namespace: embed.workspace.slug, ? await VectorDb.performSimilaritySearch({
input: message, namespace: embed.workspace.slug,
LLMConnector, input: message,
similarityThreshold: embed.workspace?.similarityThreshold, LLMConnector,
topN: embed.workspace?.topN, similarityThreshold: embed.workspace?.similarityThreshold,
}); topN: embed.workspace?.topN,
})
: {
contextTexts: [],
sources: [],
message: null,
};
// Failed similarity search. // Failed similarity search.
if (!!error) { if (!!error) {
@ -176,7 +176,7 @@ async function recentEmbedChatHistory(
messageLimit = 20, messageLimit = 20,
chatMode = null chatMode = null
) { ) {
if (chatMode === "query") return []; if (chatMode === "query") return { rawHistory: [], chatHistory: [] };
const rawHistory = ( const rawHistory = (
await EmbedChats.forEmbedByUser(embed.id, sessionId, messageLimit, { await EmbedChats.forEmbedByUser(embed.id, sessionId, messageLimit, {
id: "desc", id: "desc",
@ -185,65 +185,6 @@ async function recentEmbedChatHistory(
return { rawHistory, chatHistory: convertToPromptHistory(rawHistory) }; return { rawHistory, chatHistory: convertToPromptHistory(rawHistory) };
} }
async function streamEmptyEmbeddingChat({
response,
uuid,
sessionId,
message,
embed,
messageLimit,
LLMConnector,
}) {
let completeText;
const { rawHistory, chatHistory } = await recentEmbedChatHistory(
sessionId,
embed,
messageLimit
);
if (LLMConnector.streamingEnabled() !== true) {
console.log(
`\x1b[31m[STREAMING DISABLED]\x1b[0m Streaming is not available for ${LLMConnector.constructor.name}. Will use regular chat method.`
);
completeText = await LLMConnector.sendChat(
chatHistory,
message,
embed.workspace,
rawHistory
);
writeResponseChunk(response, {
uuid,
type: "textResponseChunk",
textResponse: completeText,
sources: [],
close: true,
error: false,
});
}
const stream = await LLMConnector.streamChat(
chatHistory,
message,
embed.workspace,
rawHistory
);
completeText = await LLMConnector.handleStream(response, stream, {
uuid,
sources: [],
});
await EmbedChats.new({
embedId: embed.id,
prompt: message,
response: { text: completeText, type: "chat" },
connection_information: response.locals.connection
? { ...response.locals.connection }
: {},
sessionId,
});
return;
}
module.exports = { module.exports = {
streamChatWithForEmbed, streamChatWithForEmbed,
}; };

View File

@ -1,46 +1,8 @@
const { v4: uuidv4 } = require("uuid"); const { v4: uuidv4 } = require("uuid");
const { WorkspaceChats } = require("../../models/workspaceChats"); const { WorkspaceChats } = require("../../models/workspaceChats");
const { resetMemory } = require("./commands/reset"); const { resetMemory } = require("./commands/reset");
const moment = require("moment");
const { getVectorDbClass, getLLMProvider } = require("../helpers"); const { getVectorDbClass, getLLMProvider } = require("../helpers");
const { convertToPromptHistory } = require("../helpers/chat/responses");
function convertToChatHistory(history = []) {
const formattedHistory = [];
history.forEach((history) => {
const { prompt, response, createdAt, feedbackScore = null, id } = history;
const data = JSON.parse(response);
formattedHistory.push([
{
role: "user",
content: prompt,
sentAt: moment(createdAt).unix(),
},
{
role: "assistant",
content: data.text,
sources: data.sources || [],
chatId: id,
sentAt: moment(createdAt).unix(),
feedbackScore,
},
]);
});
return formattedHistory.flat();
}
function convertToPromptHistory(history = []) {
const formattedHistory = [];
history.forEach((history) => {
const { prompt, response } = history;
const data = JSON.parse(response);
formattedHistory.push([
{ role: "user", content: prompt },
{ role: "assistant", content: data.text },
]);
});
return formattedHistory.flat();
}
const VALID_COMMANDS = { const VALID_COMMANDS = {
"/reset": resetMemory, "/reset": resetMemory,
@ -64,7 +26,8 @@ async function chatWithWorkspace(
workspace, workspace,
message, message,
chatMode = "chat", chatMode = "chat",
user = null user = null,
thread = null
) { ) {
const uuid = uuidv4(); const uuid = uuidv4();
const command = grepCommand(message); const command = grepCommand(message);
@ -92,49 +55,51 @@ async function chatWithWorkspace(
const messageLimit = workspace?.openAiHistory || 20; const messageLimit = workspace?.openAiHistory || 20;
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug); const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug); const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
if (!hasVectorizedSpace || embeddingsCount === 0) {
if (chatMode === "query") {
return {
id: uuid,
type: "textResponse",
sources: [],
close: true,
error: null,
textResponse:
"There is no relevant information in this workspace to answer your query.",
};
}
// If there are no embeddings - chat like a normal LLM chat interface. // User is trying to query-mode chat a workspace that has no data in it - so
return await emptyEmbeddingChat({ // we should exit early as no information can be found under these conditions.
uuid, if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
user, return {
message, id: uuid,
workspace, type: "textResponse",
messageLimit, sources: [],
LLMConnector, close: true,
}); error: null,
textResponse:
"There is no relevant information in this workspace to answer your query.",
};
} }
const { rawHistory, chatHistory } = await recentChatHistory( // If we are here we know that we are in a workspace that is:
// 1. Chatting in "chat" mode and may or may _not_ have embeddings
// 2. Chatting in "query" mode and has at least 1 embedding
const { rawHistory, chatHistory } = await recentChatHistory({
user, user,
workspace, workspace,
thread,
messageLimit, messageLimit,
chatMode chatMode,
); });
const { const {
contextTexts = [], contextTexts = [],
sources = [], sources = [],
message: error, message: error,
} = await VectorDb.performSimilaritySearch({ } = embeddingsCount !== 0 // if there no embeddings don't bother searching.
namespace: workspace.slug, ? await VectorDb.performSimilaritySearch({
input: message, namespace: workspace.slug,
LLMConnector, input: message,
similarityThreshold: workspace?.similarityThreshold, LLMConnector,
topN: workspace?.topN, similarityThreshold: workspace?.similarityThreshold,
}); topN: workspace?.topN,
})
: {
contextTexts: [],
sources: [],
message: null,
};
// Failed similarity search. // Failed similarity search if it was run at all and failed.
if (!!error) { if (!!error) {
return { return {
id: uuid, id: uuid,
@ -147,7 +112,7 @@ async function chatWithWorkspace(
} }
// If in query mode and no sources are found, do not // If in query mode and no sources are found, do not
// let the LLM try to hallucinate a response or use general knowledge // let the LLM try to hallucinate a response or use general knowledge and exit early
if (chatMode === "query" && sources.length === 0) { if (chatMode === "query" && sources.length === 0) {
return { return {
id: uuid, id: uuid,
@ -160,7 +125,7 @@ async function chatWithWorkspace(
}; };
} }
// Compress message to ensure prompt passes token limit with room for response // Compress & Assemble message to ensure prompt passes token limit with room for response
// and build system messages based on inputs and history. // and build system messages based on inputs and history.
const messages = await LLMConnector.compressMessages( const messages = await LLMConnector.compressMessages(
{ {
@ -187,10 +152,12 @@ async function chatWithWorkspace(
error: "No text completion could be completed with this input.", error: "No text completion could be completed with this input.",
}; };
} }
const { chat } = await WorkspaceChats.new({ const { chat } = await WorkspaceChats.new({
workspaceId: workspace.id, workspaceId: workspace.id,
prompt: message, prompt: message,
response: { text: textResponse, sources, type: chatMode }, response: { text: textResponse, sources, type: chatMode },
threadId: thread?.id || null,
user, user,
}); });
return { return {
@ -204,41 +171,14 @@ async function chatWithWorkspace(
}; };
} }
// On query we dont return message history. All other chat modes and when chatting async function recentChatHistory({
// with no embeddings we return history.
// TODO: Refactor to just run a .where on WorkspaceChat to simplify what is going on here.
// see recentThreadChatHistory
async function recentChatHistory(
user = null, user = null,
workspace, workspace,
thread = null,
messageLimit = 20, messageLimit = 20,
chatMode = null chatMode = null,
) { }) {
if (chatMode === "query") return []; if (chatMode === "query") return { rawHistory: [], chatHistory: [] };
const rawHistory = (
user
? await WorkspaceChats.forWorkspaceByUser(
workspace.id,
user.id,
messageLimit,
{ id: "desc" }
)
: await WorkspaceChats.forWorkspace(workspace.id, messageLimit, {
id: "desc",
})
).reverse();
return { rawHistory, chatHistory: convertToPromptHistory(rawHistory) };
}
// Extension of recentChatHistory that supports threads
async function recentThreadChatHistory(
user = null,
workspace,
thread,
messageLimit = 20,
chatMode = null
) {
if (chatMode === "query") return [];
const rawHistory = ( const rawHistory = (
await WorkspaceChats.where( await WorkspaceChats.where(
{ {
@ -254,42 +194,6 @@ async function recentThreadChatHistory(
return { rawHistory, chatHistory: convertToPromptHistory(rawHistory) }; return { rawHistory, chatHistory: convertToPromptHistory(rawHistory) };
} }
async function emptyEmbeddingChat({
uuid,
user,
message,
workspace,
messageLimit,
LLMConnector,
}) {
const { rawHistory, chatHistory } = await recentChatHistory(
user,
workspace,
messageLimit
);
const textResponse = await LLMConnector.sendChat(
chatHistory,
message,
workspace,
rawHistory
);
const { chat } = await WorkspaceChats.new({
workspaceId: workspace.id,
prompt: message,
response: { text: textResponse, sources: [], type: "chat" },
user,
});
return {
id: uuid,
type: "textResponse",
sources: [],
close: true,
error: null,
chatId: chat.id,
textResponse,
};
}
function chatPrompt(workspace) { function chatPrompt(workspace) {
return ( return (
workspace?.openAiPrompt ?? workspace?.openAiPrompt ??
@ -299,9 +203,6 @@ function chatPrompt(workspace) {
module.exports = { module.exports = {
recentChatHistory, recentChatHistory,
recentThreadChatHistory,
convertToPromptHistory,
convertToChatHistory,
chatWithWorkspace, chatWithWorkspace,
chatPrompt, chatPrompt,
grepCommand, grepCommand,

View File

@ -1,19 +1,15 @@
const { v4: uuidv4 } = require("uuid"); const { v4: uuidv4 } = require("uuid");
const { WorkspaceChats } = require("../../models/workspaceChats"); const { WorkspaceChats } = require("../../models/workspaceChats");
const { getVectorDbClass, getLLMProvider } = require("../helpers"); const { getVectorDbClass, getLLMProvider } = require("../helpers");
const { writeResponseChunk } = require("../helpers/chat/responses");
const { const {
grepCommand, grepCommand,
recentChatHistory,
VALID_COMMANDS, VALID_COMMANDS,
chatPrompt, chatPrompt,
recentThreadChatHistory, recentChatHistory,
} = require("."); } = require("./index");
const VALID_CHAT_MODE = ["chat", "query"]; const VALID_CHAT_MODE = ["chat", "query"];
function writeResponseChunk(response, data) {
response.write(`data: ${JSON.stringify(data)}\n\n`);
return;
}
async function streamChatWithWorkspace( async function streamChatWithWorkspace(
response, response,
@ -58,59 +54,53 @@ async function streamChatWithWorkspace(
const messageLimit = workspace?.openAiHistory || 20; const messageLimit = workspace?.openAiHistory || 20;
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug); const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug); const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
if (!hasVectorizedSpace || embeddingsCount === 0) {
if (chatMode === "query") {
writeResponseChunk(response, {
id: uuid,
type: "textResponse",
textResponse:
"There is no relevant information in this workspace to answer your query.",
sources: [],
close: true,
error: null,
});
return;
}
// If there are no embeddings - chat like a normal LLM chat interface. // User is trying to query-mode chat a workspace that has no data in it - so
// no need to pass in chat mode - because if we are here we are in // we should exit early as no information can be found under these conditions.
// "chat" mode + have embeddings. if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
return await streamEmptyEmbeddingChat({ writeResponseChunk(response, {
response, id: uuid,
uuid, type: "textResponse",
user, textResponse:
message, "There is no relevant information in this workspace to answer your query.",
workspace, sources: [],
messageLimit, close: true,
LLMConnector, error: null,
thread,
}); });
return;
} }
// If we are here we know that we are in a workspace that is:
// 1. Chatting in "chat" mode and may or may _not_ have embeddings
// 2. Chatting in "query" mode and has at least 1 embedding
let completeText; let completeText;
const { rawHistory, chatHistory } = thread const { rawHistory, chatHistory } = await recentChatHistory({
? await recentThreadChatHistory( user,
user, workspace,
workspace, thread,
thread, messageLimit,
messageLimit, chatMode,
chatMode });
)
: await recentChatHistory(user, workspace, messageLimit, chatMode);
const { const {
contextTexts = [], contextTexts = [],
sources = [], sources = [],
message: error, message: error,
} = await VectorDb.performSimilaritySearch({ } = embeddingsCount !== 0 // if there no embeddings don't bother searching.
namespace: workspace.slug, ? await VectorDb.performSimilaritySearch({
input: message, namespace: workspace.slug,
LLMConnector, input: message,
similarityThreshold: workspace?.similarityThreshold, LLMConnector,
topN: workspace?.topN, similarityThreshold: workspace?.similarityThreshold,
}); topN: workspace?.topN,
})
: {
contextTexts: [],
sources: [],
message: null,
};
// Failed similarity search. // Failed similarity search if it was run at all and failed.
if (!!error) { if (!!error) {
writeResponseChunk(response, { writeResponseChunk(response, {
id: uuid, id: uuid,
@ -124,7 +114,7 @@ async function streamChatWithWorkspace(
} }
// If in query mode and no sources are found, do not // If in query mode and no sources are found, do not
// let the LLM try to hallucinate a response or use general knowledge // let the LLM try to hallucinate a response or use general knowledge and exit early
if (chatMode === "query" && sources.length === 0) { if (chatMode === "query" && sources.length === 0) {
writeResponseChunk(response, { writeResponseChunk(response, {
id: uuid, id: uuid,
@ -138,7 +128,7 @@ async function streamChatWithWorkspace(
return; return;
} }
// Compress message to ensure prompt passes token limit with room for response // Compress & Assemble message to ensure prompt passes token limit with room for response
// and build system messages based on inputs and history. // and build system messages based on inputs and history.
const messages = await LLMConnector.compressMessages( const messages = await LLMConnector.compressMessages(
{ {
@ -181,7 +171,7 @@ async function streamChatWithWorkspace(
workspaceId: workspace.id, workspaceId: workspace.id,
prompt: message, prompt: message,
response: { text: completeText, sources, type: chatMode }, response: { text: completeText, sources, type: chatMode },
threadId: thread?.id, threadId: thread?.id || null,
user, user,
}); });
@ -195,166 +185,7 @@ async function streamChatWithWorkspace(
return; return;
} }
async function streamEmptyEmbeddingChat({
response,
uuid,
user,
message,
workspace,
messageLimit,
LLMConnector,
thread = null,
}) {
let completeText;
const { rawHistory, chatHistory } = thread
? await recentThreadChatHistory(user, workspace, thread, messageLimit)
: await recentChatHistory(user, workspace, messageLimit);
// If streaming is not explicitly enabled for connector
// we do regular waiting of a response and send a single chunk.
if (LLMConnector.streamingEnabled() !== true) {
console.log(
`\x1b[31m[STREAMING DISABLED]\x1b[0m Streaming is not available for ${LLMConnector.constructor.name}. Will use regular chat method.`
);
completeText = await LLMConnector.sendChat(
chatHistory,
message,
workspace,
rawHistory
);
writeResponseChunk(response, {
uuid,
type: "textResponseChunk",
textResponse: completeText,
sources: [],
close: true,
error: false,
});
} else {
const stream = await LLMConnector.streamChat(
chatHistory,
message,
workspace,
rawHistory
);
completeText = await LLMConnector.handleStream(response, stream, {
uuid,
sources: [],
});
}
const { chat } = await WorkspaceChats.new({
workspaceId: workspace.id,
prompt: message,
response: { text: completeText, sources: [], type: "chat" },
threadId: thread?.id,
user,
});
writeResponseChunk(response, {
uuid,
type: "finalizeResponseStream",
close: true,
error: false,
chatId: chat.id,
});
return;
}
// The default way to handle a stream response. Functions best with OpenAI.
function handleDefaultStreamResponse(response, stream, responseProps) {
const { uuid = uuidv4(), sources = [] } = responseProps;
return new Promise((resolve) => {
let fullText = "";
let chunk = "";
stream.data.on("data", (data) => {
const lines = data
?.toString()
?.split("\n")
.filter((line) => line.trim() !== "");
for (const line of lines) {
let validJSON = false;
const message = chunk + line.replace(/^data: /, "");
// JSON chunk is incomplete and has not ended yet
// so we need to stitch it together. You would think JSON
// chunks would only come complete - but they don't!
try {
JSON.parse(message);
validJSON = true;
} catch {}
if (!validJSON) {
// It can be possible that the chunk decoding is running away
// and the message chunk fails to append due to string length.
// In this case abort the chunk and reset so we can continue.
// ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
try {
chunk += message;
} catch (e) {
console.error(`Chunk appending error`, e);
chunk = "";
}
continue;
} else {
chunk = "";
}
if (message == "[DONE]") {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
resolve(fullText);
} else {
let finishReason = null;
let token = "";
try {
const json = JSON.parse(message);
token = json?.choices?.[0]?.delta?.content;
finishReason = json?.choices?.[0]?.finish_reason || null;
} catch {
continue;
}
if (token) {
fullText += token;
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: token,
close: false,
error: false,
});
}
if (finishReason !== null) {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
resolve(fullText);
}
}
}
});
});
}
module.exports = { module.exports = {
VALID_CHAT_MODE, VALID_CHAT_MODE,
streamChatWithWorkspace, streamChatWithWorkspace,
writeResponseChunk,
handleDefaultStreamResponse,
}; };

View File

@ -1,5 +1,5 @@
const { convertToPromptHistory } = require("../../chats");
const { TokenManager } = require("../tiktoken"); const { TokenManager } = require("../tiktoken");
const { convertToPromptHistory } = require("./responses");
/* /*
What is the message Array compressor? What is the message Array compressor?

View File

@ -0,0 +1,144 @@
const { v4: uuidv4 } = require("uuid");
const moment = require("moment");
// The default way to handle a stream response. Functions best with OpenAI.
// Currently used for LMStudio, LocalAI, Mistral API, and OpenAI
function handleDefaultStreamResponse(response, stream, responseProps) {
const { uuid = uuidv4(), sources = [] } = responseProps;
return new Promise((resolve) => {
let fullText = "";
let chunk = "";
stream.data.on("data", (data) => {
const lines = data
?.toString()
?.split("\n")
.filter((line) => line.trim() !== "");
for (const line of lines) {
let validJSON = false;
const message = chunk + line.replace(/^data: /, "");
// JSON chunk is incomplete and has not ended yet
// so we need to stitch it together. You would think JSON
// chunks would only come complete - but they don't!
try {
JSON.parse(message);
validJSON = true;
} catch {}
if (!validJSON) {
// It can be possible that the chunk decoding is running away
// and the message chunk fails to append due to string length.
// In this case abort the chunk and reset so we can continue.
// ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
try {
chunk += message;
} catch (e) {
console.error(`Chunk appending error`, e);
chunk = "";
}
continue;
} else {
chunk = "";
}
if (message == "[DONE]") {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
resolve(fullText);
} else {
let finishReason = null;
let token = "";
try {
const json = JSON.parse(message);
token = json?.choices?.[0]?.delta?.content;
finishReason = json?.choices?.[0]?.finish_reason || null;
} catch {
continue;
}
if (token) {
fullText += token;
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: token,
close: false,
error: false,
});
}
if (finishReason !== null) {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
resolve(fullText);
}
}
}
});
});
}
function convertToChatHistory(history = []) {
const formattedHistory = [];
history.forEach((history) => {
const { prompt, response, createdAt, feedbackScore = null, id } = history;
const data = JSON.parse(response);
formattedHistory.push([
{
role: "user",
content: prompt,
sentAt: moment(createdAt).unix(),
},
{
role: "assistant",
content: data.text,
sources: data.sources || [],
chatId: id,
sentAt: moment(createdAt).unix(),
feedbackScore,
},
]);
});
return formattedHistory.flat();
}
function convertToPromptHistory(history = []) {
const formattedHistory = [];
history.forEach((history) => {
const { prompt, response } = history;
const data = JSON.parse(response);
formattedHistory.push([
{ role: "user", content: prompt },
{ role: "assistant", content: data.text },
]);
});
return formattedHistory.flat();
}
function writeResponseChunk(response, data) {
response.write(`data: ${JSON.stringify(data)}\n\n`);
return;
}
module.exports = {
handleDefaultStreamResponse,
convertToChatHistory,
convertToPromptHistory,
writeResponseChunk,
};