anything-llm/server/utils/chats/stream.js

const { v4: uuidv4 } = require("uuid");
const { WorkspaceChats } = require("../../models/workspaceChats");
const { getVectorDbClass, getLLMProvider } = require("../helpers");
const { writeResponseChunk } = require("../helpers/chat/responses");
const {
  grepCommand,
  VALID_COMMANDS,
  chatPrompt,
  recentChatHistory,
} = require("./index");

const VALID_CHAT_MODE = ["chat", "query"];

async function streamChatWithWorkspace(
  response,
  workspace,
  message,
  chatMode = "chat",
  user = null,
  thread = null
) {
  const uuid = uuidv4();
  const command = grepCommand(message);

  if (!!command && Object.keys(VALID_COMMANDS).includes(command)) {
    const data = await VALID_COMMANDS[command](
      workspace,
      message,
      uuid,
      user,
      thread
    );
    writeResponseChunk(response, data);
    return;
  }

  const LLMConnector = getLLMProvider(workspace?.chatModel);
  const VectorDb = getVectorDbClass();
  const { safe, reasons = [] } = await LLMConnector.isSafe(message);
  if (!safe) {
    writeResponseChunk(response, {
      id: uuid,
      type: "abort",
      textResponse: null,
      sources: [],
      close: true,
      error: `This message was moderated and will not be allowed. Violations for ${reasons.join(
        ", "
      )} found.`,
    });
    return;
  }

  const messageLimit = workspace?.openAiHistory || 20;
  const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
  const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);

  // User is trying to query-mode chat a workspace that has no data in it - so
  // we should exit early as no information can be found under these conditions.
  if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
    writeResponseChunk(response, {
      id: uuid,
      type: "textResponse",
      textResponse:
        "There is no relevant information in this workspace to answer your query.",
      sources: [],
      close: true,
      error: null,
    });
    return;
  }

  // If we are here we know that we are in a workspace that is:
  // 1. Chatting in "chat" mode and may or may _not_ have embeddings
  // 2. Chatting in "query" mode and has at least 1 embedding
  let completeText;
  const { rawHistory, chatHistory } = await recentChatHistory({
    user,
    workspace,
    thread,
    messageLimit,
    chatMode,
  });

  const {
    contextTexts = [],
    sources = [],
    message: error,
  } = embeddingsCount !== 0 // if there no embeddings don't bother searching.
    ? await VectorDb.performSimilaritySearch({
        namespace: workspace.slug,
        input: message,
        LLMConnector,
        similarityThreshold: workspace?.similarityThreshold,
        topN: workspace?.topN,
      })
    : {
        contextTexts: [],
        sources: [],
        message: null,
      };

  // Failed similarity search if it was run at all and failed.
  if (!!error) {
    writeResponseChunk(response, {
      id: uuid,
      type: "abort",
      textResponse: null,
      sources: [],
      close: true,
      error,
    });
    return;
  }

  // If in query mode and no sources are found, do not
  // let the LLM try to hallucinate a response or use general knowledge and exit early
  if (chatMode === "query" && sources.length === 0) {
    writeResponseChunk(response, {
      id: uuid,
      type: "textResponse",
      textResponse:
        "There is no relevant information in this workspace to answer your query.",
      sources: [],
      close: true,
      error: null,
    });
    return;
  }

  // Compress & Assemble message to ensure prompt passes token limit with room for response
  // and build system messages based on inputs and history.
  const messages = await LLMConnector.compressMessages(
    {
      systemPrompt: chatPrompt(workspace),
      userPrompt: message,
      contextTexts,
      chatHistory,
    },
    rawHistory
  );

  // If streaming is not explicitly enabled for connector
  // we do regular waiting of a response and send a single chunk.
  if (LLMConnector.streamingEnabled() !== true) {
    console.log(
      `\x1b[31m[STREAMING DISABLED]\x1b[0m Streaming is not available for ${LLMConnector.constructor.name}. Will use regular chat method.`
    );
    completeText = await LLMConnector.getChatCompletion(messages, {
      temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp,
    });
    writeResponseChunk(response, {
      uuid,
      sources,
      type: "textResponseChunk",
      textResponse: completeText,
      close: true,
      error: false,
    });
  } else {
    const stream = await LLMConnector.streamGetChatCompletion(messages, {
      temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp,
    });
    completeText = await LLMConnector.handleStream(response, stream, {
      uuid,
      sources,
    });
  }

  const { chat } = await WorkspaceChats.new({
    workspaceId: workspace.id,
    prompt: message,
    response: { text: completeText, sources, type: chatMode },
    threadId: thread?.id || null,
    user,
  });

  writeResponseChunk(response, {
    uuid,
    type: "finalizeResponseStream",
    close: true,
    error: false,
    chatId: chat.id,
  });
  return;
}

module.exports = {
  VALID_CHAT_MODE,
  streamChatWithWorkspace,
};
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`const { v4: uuidv4 } = require("uuid");`
			`const { WorkspaceChats } = require("../../models/workspaceChats");`
			`const { getVectorDbClass, getLLMProvider } = require("../helpers");`
Refactor LLM chat backend (#717) * refactor stream/chat/embed-stram to be a single execution logic path so that it is easier to maintain and build upon * no thread in sync chat since only api uses it adjust import locations 2024-02-14 21:32:07 +01:00			`const { writeResponseChunk } = require("../helpers/chat/responses");`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`const {`
			`grepCommand,`
			`VALID_COMMANDS,`
			`chatPrompt,`
Refactor LLM chat backend (#717) * refactor stream/chat/embed-stram to be a single execution logic path so that it is easier to maintain and build upon * no thread in sync chat since only api uses it adjust import locations 2024-02-14 21:32:07 +01:00			`recentChatHistory,`
			`} = require("./index");`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00
Implement streaming for workspace chats via API (#604) 2024-01-16 19:37:46 +01:00			`const VALID_CHAT_MODE = ["chat", "query"];`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00
			`async function streamChatWithWorkspace(`
			`response,`
			`workspace,`
			`message,`
			`chatMode = "chat",`
Implement workspace threading that is backwards compatible (#699) * Implement workspace thread that is compatible with legacy versions * last touches * comment on chat qty enforcement 2024-02-09 03:37:22 +01:00			`user = null,`
			`thread = null`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`) {`
			`const uuid = uuidv4();`
			`const command = grepCommand(message);`

			`if (!!command && Object.keys(VALID_COMMANDS).includes(command)) {`
Implement workspace threading that is backwards compatible (#699) * Implement workspace thread that is compatible with legacy versions * last touches * comment on chat qty enforcement 2024-02-09 03:37:22 +01:00			`const data = await VALID_COMMANDS[command](`
			`workspace,`
			`message,`
			`uuid,`
			`user,`
			`thread`
			`);`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`writeResponseChunk(response, data);`
			`return;`
			`}`

Per workspace model selection (#582) * WIP model selection per workspace (migrations and openai saves properly * revert OpenAiOption * add support for models per workspace for anthropic, localAi, ollama, openAi, and togetherAi * remove unneeded comments * update logic for when LLMProvider is reset, reset Ai provider files with master * remove frontend/api reset of workspace chat and move logic to updateENV add postUpdate callbacks to envs * set preferred model for chat on class instantiation * remove extra param * linting * remove unused var * refactor chat model selection on workspace * linting * add fallback for base path to localai models --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> 2024-01-17 21:59:25 +01:00			`const LLMConnector = getLLMProvider(workspace?.chatModel);`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`const VectorDb = getVectorDbClass();`
			`const { safe, reasons = [] } = await LLMConnector.isSafe(message);`
			`if (!safe) {`
			`writeResponseChunk(response, {`
			`id: uuid,`
			`type: "abort",`
			`textResponse: null,`
			`sources: [],`
			`close: true,`
			error: `This message was moderated and will not be allowed. Violations for ${reasons.join(
			`", "`
			)} found.`,
			`});`
			`return;`
			`}`

			`const messageLimit = workspace?.openAiHistory \|\| 20;`
			`const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);`
			`const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);`
[Feat] Query mode to return no-result when no context found (#601) * Query mode to return no-result when no context found * update default error for sync chat * remove unnecessary type conversion 2024-01-16 18:32:51 +01:00
Refactor LLM chat backend (#717) * refactor stream/chat/embed-stram to be a single execution logic path so that it is easier to maintain and build upon * no thread in sync chat since only api uses it adjust import locations 2024-02-14 21:32:07 +01:00			`// User is trying to query-mode chat a workspace that has no data in it - so`
			`// we should exit early as no information can be found under these conditions.`
			`if ((!hasVectorizedSpace \|\| embeddingsCount === 0) && chatMode === "query") {`
			`writeResponseChunk(response, {`
			`id: uuid,`
			`type: "textResponse",`
			`textResponse:`
			`"There is no relevant information in this workspace to answer your query.",`
			`sources: [],`
			`close: true,`
			`error: null,`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`});`
Refactor LLM chat backend (#717) * refactor stream/chat/embed-stram to be a single execution logic path so that it is easier to maintain and build upon * no thread in sync chat since only api uses it adjust import locations 2024-02-14 21:32:07 +01:00			`return;`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`}`

Refactor LLM chat backend (#717) * refactor stream/chat/embed-stram to be a single execution logic path so that it is easier to maintain and build upon * no thread in sync chat since only api uses it adjust import locations 2024-02-14 21:32:07 +01:00			`// If we are here we know that we are in a workspace that is:`
			`// 1. Chatting in "chat" mode and may or may _not_ have embeddings`
			`// 2. Chatting in "query" mode and has at least 1 embedding`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`let completeText;`
Refactor LLM chat backend (#717) * refactor stream/chat/embed-stram to be a single execution logic path so that it is easier to maintain and build upon * no thread in sync chat since only api uses it adjust import locations 2024-02-14 21:32:07 +01:00			`const { rawHistory, chatHistory } = await recentChatHistory({`
			`user,`
			`workspace,`
			`thread,`
			`messageLimit,`
			`chatMode,`
			`});`
Implement workspace threading that is backwards compatible (#699) * Implement workspace thread that is compatible with legacy versions * last touches * comment on chat qty enforcement 2024-02-09 03:37:22 +01:00
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`const {`
			`contextTexts = [],`
			`sources = [],`
			`message: error,`
Refactor LLM chat backend (#717) * refactor stream/chat/embed-stram to be a single execution logic path so that it is easier to maintain and build upon * no thread in sync chat since only api uses it adjust import locations 2024-02-14 21:32:07 +01:00			`} = embeddingsCount !== 0 // if there no embeddings don't bother searching.`
			`? await VectorDb.performSimilaritySearch({`
			`namespace: workspace.slug,`
			`input: message,`
			`LLMConnector,`
			`similarityThreshold: workspace?.similarityThreshold,`
			`topN: workspace?.topN,`
			`})`
			`: {`
			`contextTexts: [],`
			`sources: [],`
			`message: null,`
			`};`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00
Refactor LLM chat backend (#717) * refactor stream/chat/embed-stram to be a single execution logic path so that it is easier to maintain and build upon * no thread in sync chat since only api uses it adjust import locations 2024-02-14 21:32:07 +01:00			`// Failed similarity search if it was run at all and failed.`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`if (!!error) {`
			`writeResponseChunk(response, {`
			`id: uuid,`
			`type: "abort",`
			`textResponse: null,`
			`sources: [],`
			`close: true,`
			`error,`
			`});`
			`return;`
			`}`

[Feat] Query mode to return no-result when no context found (#601) * Query mode to return no-result when no context found * update default error for sync chat * remove unnecessary type conversion 2024-01-16 18:32:51 +01:00			`// If in query mode and no sources are found, do not`
Refactor LLM chat backend (#717) * refactor stream/chat/embed-stram to be a single execution logic path so that it is easier to maintain and build upon * no thread in sync chat since only api uses it adjust import locations 2024-02-14 21:32:07 +01:00			`// let the LLM try to hallucinate a response or use general knowledge and exit early`
[Feat] Query mode to return no-result when no context found (#601) * Query mode to return no-result when no context found * update default error for sync chat * remove unnecessary type conversion 2024-01-16 18:32:51 +01:00			`if (chatMode === "query" && sources.length === 0) {`
			`writeResponseChunk(response, {`
			`id: uuid,`
			`type: "textResponse",`
			`textResponse:`
			`"There is no relevant information in this workspace to answer your query.",`
			`sources: [],`
			`close: true,`
			`error: null,`
			`});`
			`return;`
			`}`

Refactor LLM chat backend (#717) * refactor stream/chat/embed-stram to be a single execution logic path so that it is easier to maintain and build upon * no thread in sync chat since only api uses it adjust import locations 2024-02-14 21:32:07 +01:00			`// Compress & Assemble message to ensure prompt passes token limit with room for response`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`// and build system messages based on inputs and history.`
			`const messages = await LLMConnector.compressMessages(`
			`{`
			`systemPrompt: chatPrompt(workspace),`
			`userPrompt: message,`
			`contextTexts,`
			`chatHistory,`
			`},`
			`rawHistory`
			`);`

			`// If streaming is not explicitly enabled for connector`
			`// we do regular waiting of a response and send a single chunk.`
			`if (LLMConnector.streamingEnabled() !== true) {`
			`console.log(`
			`\x1b[31m[STREAMING DISABLED]\x1b[0m Streaming is not available for ${LLMConnector.constructor.name}. Will use regular chat method.`
			`);`
			`completeText = await LLMConnector.getChatCompletion(messages, {`
add support for mistral api (#610) * add support for mistral api * update docs to show support for Mistral * add default temp to all providers, suggest different results per provider --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> 2024-01-17 23:42:05 +01:00			`temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp,`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`});`
			`writeResponseChunk(response, {`
			`uuid,`
			`sources,`
			`type: "textResponseChunk",`
			`textResponse: completeText,`
			`close: true,`
			`error: false,`
			`});`
			`} else {`
			`const stream = await LLMConnector.streamGetChatCompletion(messages, {`
add support for mistral api (#610) * add support for mistral api * update docs to show support for Mistral * add default temp to all providers, suggest different results per provider --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> 2024-01-17 23:42:05 +01:00			`temperature: workspace?.openAiTemp ?? LLMConnector.defaultTemp,`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`});`
Refactor handleStream to LLM Classes (#685) 2024-02-07 17:15:14 +01:00			`completeText = await LLMConnector.handleStream(response, stream, {`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`uuid,`
			`sources,`
			`});`
			`}`

[FEAT] RLHF on response messages (#708) * WIP RLHF works on historical messages * refactor Actions component * completed RLHF up and down votes for chats * add defaults for HistoricalMessage params * refactor RLHF implmenation remove forwardRef on history items to prevent rerenders * remove dup id * Add rating to CSV output --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> 2024-02-13 20:33:05 +01:00			`const { chat } = await WorkspaceChats.new({`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`workspaceId: workspace.id,`
			`prompt: message,`
			`response: { text: completeText, sources, type: chatMode },`
Refactor LLM chat backend (#717) * refactor stream/chat/embed-stram to be a single execution logic path so that it is easier to maintain and build upon * no thread in sync chat since only api uses it adjust import locations 2024-02-14 21:32:07 +01:00			`threadId: thread?.id \|\| null,`
[FEAT] RLHF on response messages (#708) * WIP RLHF works on historical messages * refactor Actions component * completed RLHF up and down votes for chats * add defaults for HistoricalMessage params * refactor RLHF implmenation remove forwardRef on history items to prevent rerenders * remove dup id * Add rating to CSV output --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> 2024-02-13 20:33:05 +01:00			`user,`
			`});`

			`writeResponseChunk(response, {`
			`uuid,`
			`type: "finalizeResponseStream",`
			`close: true,`
			`error: false,`
			`chatId: chat.id,`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`});`
			`return;`
			`}`

			`module.exports = {`
Implement streaming for workspace chats via API (#604) 2024-01-16 19:37:46 +01:00			`VALID_CHAT_MODE,`
Enable chat streaming for LLMs (#354) * [Draft] Enable chat streaming for LLMs * stream only, move sendChat to deprecated * Update TODO deprecation comments update console output color for streaming disabled 2023-11-14 00:07:30 +01:00			`streamChatWithWorkspace,`
			`};`