anything-llm/server/utils/chats/embed.js

const { v4: uuidv4 } = require("uuid");
const { getVectorDbClass, getLLMProvider } = require("../helpers");
const { chatPrompt, convertToPromptHistory } = require(".");
const { writeResponseChunk } = require("./stream");
const { EmbedChats } = require("../../models/embedChats");

async function streamChatWithForEmbed(
  response,
  /** @type {import("@prisma/client").embed_configs & {workspace?: import("@prisma/client").workspaces}} */
  embed,
  /** @type {String} */
  message,
  /** @type {String} */
  sessionId,
  { promptOverride, modelOverride, temperatureOverride }
) {
  const chatMode = embed.chat_mode;
  const chatModel = embed.allow_model_override ? modelOverride : null;

  // If there are overrides in request & they are permitted, override the default workspace ref information.
  if (embed.allow_prompt_override)
    embed.workspace.openAiPrompt = promptOverride;
  if (embed.allow_temperature_override)
    embed.workspace.openAiTemp = parseFloat(temperatureOverride);

  const uuid = uuidv4();
  const LLMConnector = getLLMProvider(chatModel ?? embed.workspace?.chatModel);
  const VectorDb = getVectorDbClass();
  const { safe, reasons = [] } = await LLMConnector.isSafe(message);
  if (!safe) {
    writeResponseChunk(response, {
      id: uuid,
      type: "abort",
      textResponse: null,
      sources: [],
      close: true,
      error: `This message was moderated and will not be allowed. Violations for ${reasons.join(
        ", "
      )} found.`,
    });
    return;
  }

  const messageLimit = 20;
  const hasVectorizedSpace = await VectorDb.hasNamespace(embed.workspace.slug);
  const embeddingsCount = await VectorDb.namespaceCount(embed.workspace.slug);
  if (!hasVectorizedSpace || embeddingsCount === 0) {
    if (chatMode === "query") {
      writeResponseChunk(response, {
        id: uuid,
        type: "textResponse",
        textResponse:
          "I do not have enough information to answer that. Try another question.",
        sources: [],
        close: true,
        error: null,
      });
      return;
    }

    // If there are no embeddings - chat like a normal LLM chat interface.
    return await streamEmptyEmbeddingChat({
      response,
      uuid,
      sessionId,
      message,
      embed,
      messageLimit,
      LLMConnector,
    });
  }

  let completeText;
  const { rawHistory, chatHistory } = await recentEmbedChatHistory(
    sessionId,
    embed,
    messageLimit,
    chatMode
  );
  const {
    contextTexts = [],
    sources = [],
    message: error,
  } = await VectorDb.performSimilaritySearch({
    namespace: embed.workspace.slug,
    input: message,
    LLMConnector,
    similarityThreshold: embed.workspace?.similarityThreshold,
    topN: embed.workspace?.topN,
  });

  // Failed similarity search.
  if (!!error) {
    writeResponseChunk(response, {
      id: uuid,
      type: "abort",
      textResponse: null,
      sources: [],
      close: true,
      error: "Failed to connect to vector database provider.",
    });
    return;
  }

  // If in query mode and no sources are found, do not
  // let the LLM try to hallucinate a response or use general knowledge
  if (chatMode === "query" && sources.length === 0) {
    writeResponseChunk(response, {
      id: uuid,
      type: "textResponse",
      textResponse:
        "There is no relevant information in this workspace to answer your query.",
      sources: [],
      close: true,
      error: null,
    });
    return;
  }

  // Compress message to ensure prompt passes token limit with room for response
  // and build system messages based on inputs and history.
  const messages = await LLMConnector.compressMessages(
    {
      systemPrompt: chatPrompt(embed.workspace),
      userPrompt: message,
      contextTexts,
      chatHistory,
    },
    rawHistory
  );

  // If streaming is not explicitly enabled for connector
  // we do regular waiting of a response and send a single chunk.
  if (LLMConnector.streamingEnabled() !== true) {
    console.log(
      `\x1b[31m[STREAMING DISABLED]\x1b[0m Streaming is not available for ${LLMConnector.constructor.name}. Will use regular chat method.`
    );
    completeText = await LLMConnector.getChatCompletion(messages, {
      temperature: embed.workspace?.openAiTemp ?? LLMConnector.defaultTemp,
    });
    writeResponseChunk(response, {
      uuid,
      sources: [],
      type: "textResponseChunk",
      textResponse: completeText,
      close: true,
      error: false,
    });
  } else {
    const stream = await LLMConnector.streamGetChatCompletion(messages, {
      temperature: embed.workspace?.openAiTemp ?? LLMConnector.defaultTemp,
    });
    completeText = await LLMConnector.handleStream(response, stream, {
      uuid,
      sources: [],
    });
  }

  await EmbedChats.new({
    embedId: embed.id,
    prompt: message,
    response: { text: completeText, type: chatMode },
    connection_information: response.locals.connection
      ? { ...response.locals.connection }
      : {},
    sessionId,
  });
  return;
}

// On query we don't return message history. All other chat modes and when chatting
// with no embeddings we return history.
async function recentEmbedChatHistory(
  sessionId,
  embed,
  messageLimit = 20,
  chatMode = null
) {
  if (chatMode === "query") return [];
  const rawHistory = (
    await EmbedChats.forEmbedByUser(embed.id, sessionId, messageLimit, {
      id: "desc",
    })
  ).reverse();
  return { rawHistory, chatHistory: convertToPromptHistory(rawHistory) };
}

async function streamEmptyEmbeddingChat({
  response,
  uuid,
  sessionId,
  message,
  embed,
  messageLimit,
  LLMConnector,
}) {
  let completeText;
  const { rawHistory, chatHistory } = await recentEmbedChatHistory(
    sessionId,
    embed,
    messageLimit
  );

  if (LLMConnector.streamingEnabled() !== true) {
    console.log(
      `\x1b[31m[STREAMING DISABLED]\x1b[0m Streaming is not available for ${LLMConnector.constructor.name}. Will use regular chat method.`
    );
    completeText = await LLMConnector.sendChat(
      chatHistory,
      message,
      embed.workspace,
      rawHistory
    );
    writeResponseChunk(response, {
      uuid,
      type: "textResponseChunk",
      textResponse: completeText,
      sources: [],
      close: true,
      error: false,
    });
  }

  const stream = await LLMConnector.streamChat(
    chatHistory,
    message,
    embed.workspace,
    rawHistory
  );
  completeText = await LLMConnector.handleStream(response, stream, {
    uuid,
    sources: [],
  });

  await EmbedChats.new({
    embedId: embed.id,
    prompt: message,
    response: { text: completeText, type: "chat" },
    connection_information: response.locals.connection
      ? { ...response.locals.connection }
      : {},
    sessionId,
  });
  return;
}

module.exports = {
  streamChatWithForEmbed,
};
[FEAT] Embedded AnythingLLM (#656) * WIP embedded app * WIP got response from backend in embedded app * WIP streaming prints to embedded app * implemented streaming and tailwind min for styling into embedded app * WIP embedded app history functional * load params from script tag into embedded app * rough in modularization of embed chat cleanup dev process for easier dev support move all chat to components todo: build process todo: backend support * remove eslint config * Implement models and cleanup embed chat endpoints Improve build process for embed prod minification and bundle size awareness WIP * forgot files * rename to embed folder * introduce chat modal styles * add middleware validations on embed chat * auto open param and default greeting * reset chat history * Admin embed config page * Admin Embed Chats mgmt page * update embed * nonpriv * more style support reopen if chat was last opened * update comments * remove unused imports * allow change of workspace for embedconfig * update failure to lookup message * update reset script * update instructions * Add more styling options Add sponsor text at bottom Support dynamic container height Loading animations * publish new embed script * Add back syntax highlighting and keep bundle small via dynamic script build * add hint * update readme * update copy model for snippet with link to styles --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> 2024-02-05 23:21:34 +01:00			`const { v4: uuidv4 } = require("uuid");`
			`const { getVectorDbClass, getLLMProvider } = require("../helpers");`
			`const { chatPrompt, convertToPromptHistory } = require(".");`
Refactor handleStream to LLM Classes (#685) 2024-02-07 17:15:14 +01:00			`const { writeResponseChunk } = require("./stream");`
[FEAT] Embedded AnythingLLM (#656) * WIP embedded app * WIP got response from backend in embedded app * WIP streaming prints to embedded app * implemented streaming and tailwind min for styling into embedded app * WIP embedded app history functional * load params from script tag into embedded app * rough in modularization of embed chat cleanup dev process for easier dev support move all chat to components todo: build process todo: backend support * remove eslint config * Implement models and cleanup embed chat endpoints Improve build process for embed prod minification and bundle size awareness WIP * forgot files * rename to embed folder * introduce chat modal styles * add middleware validations on embed chat * auto open param and default greeting * reset chat history * Admin embed config page * Admin Embed Chats mgmt page * update embed * nonpriv * more style support reopen if chat was last opened * update comments * remove unused imports * allow change of workspace for embedconfig * update failure to lookup message * update reset script * update instructions * Add more styling options Add sponsor text at bottom Support dynamic container height Loading animations * publish new embed script * Add back syntax highlighting and keep bundle small via dynamic script build * add hint * update readme * update copy model for snippet with link to styles --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> 2024-02-05 23:21:34 +01:00			`const { EmbedChats } = require("../../models/embedChats");`

			`async function streamChatWithForEmbed(`
			`response,`
			`/** @type {import("@prisma/client").embed_configs & {workspace?: import("@prisma/client").workspaces}} */`
			`embed,`
			`/** @type {String} */`
			`message,`
			`/** @type {String} */`
			`sessionId,`
			`{ promptOverride, modelOverride, temperatureOverride }`
			`) {`
			`const chatMode = embed.chat_mode;`
			`const chatModel = embed.allow_model_override ? modelOverride : null;`

			`// If there are overrides in request & they are permitted, override the default workspace ref information.`
			`if (embed.allow_prompt_override)`
			`embed.workspace.openAiPrompt = promptOverride;`
			`if (embed.allow_temperature_override)`
			`embed.workspace.openAiTemp = parseFloat(temperatureOverride);`

			`const uuid = uuidv4();`
			`const LLMConnector = getLLMProvider(chatModel ?? embed.workspace?.chatModel);`
			`const VectorDb = getVectorDbClass();`
			`const { safe, reasons = [] } = await LLMConnector.isSafe(message);`
			`if (!safe) {`
			`writeResponseChunk(response, {`
			`id: uuid,`
			`type: "abort",`
			`textResponse: null,`
			`sources: [],`
			`close: true,`
			error: `This message was moderated and will not be allowed. Violations for ${reasons.join(
			`", "`
			)} found.`,
			`});`
			`return;`
			`}`

			`const messageLimit = 20;`
			`const hasVectorizedSpace = await VectorDb.hasNamespace(embed.workspace.slug);`
			`const embeddingsCount = await VectorDb.namespaceCount(embed.workspace.slug);`
			`if (!hasVectorizedSpace \|\| embeddingsCount === 0) {`
			`if (chatMode === "query") {`
			`writeResponseChunk(response, {`
			`id: uuid,`
			`type: "textResponse",`
			`textResponse:`
			`"I do not have enough information to answer that. Try another question.",`
			`sources: [],`
			`close: true,`
			`error: null,`
			`});`
			`return;`
			`}`

			`// If there are no embeddings - chat like a normal LLM chat interface.`
			`return await streamEmptyEmbeddingChat({`
			`response,`
			`uuid,`
			`sessionId,`
			`message,`
			`embed,`
			`messageLimit,`
			`LLMConnector,`
			`});`
			`}`

			`let completeText;`
			`const { rawHistory, chatHistory } = await recentEmbedChatHistory(`
			`sessionId,`
			`embed,`
			`messageLimit,`
			`chatMode`
			`);`
			`const {`
			`contextTexts = [],`
			`sources = [],`
			`message: error,`
			`} = await VectorDb.performSimilaritySearch({`
			`namespace: embed.workspace.slug,`
			`input: message,`
			`LLMConnector,`
			`similarityThreshold: embed.workspace?.similarityThreshold,`
			`topN: embed.workspace?.topN,`
			`});`

			`// Failed similarity search.`
			`if (!!error) {`
			`writeResponseChunk(response, {`
			`id: uuid,`
			`type: "abort",`
			`textResponse: null,`
			`sources: [],`
			`close: true,`
			`error: "Failed to connect to vector database provider.",`
			`});`
			`return;`
			`}`

			`// If in query mode and no sources are found, do not`
			`// let the LLM try to hallucinate a response or use general knowledge`
			`if (chatMode === "query" && sources.length === 0) {`
			`writeResponseChunk(response, {`
			`id: uuid,`
			`type: "textResponse",`
			`textResponse:`
			`"There is no relevant information in this workspace to answer your query.",`
			`sources: [],`
			`close: true,`
			`error: null,`
			`});`
			`return;`
			`}`

			`// Compress message to ensure prompt passes token limit with room for response`
			`// and build system messages based on inputs and history.`
			`const messages = await LLMConnector.compressMessages(`
			`{`
			`systemPrompt: chatPrompt(embed.workspace),`
			`userPrompt: message,`
			`contextTexts,`
			`chatHistory,`
			`},`
			`rawHistory`
			`);`

			`// If streaming is not explicitly enabled for connector`
			`// we do regular waiting of a response and send a single chunk.`
			`if (LLMConnector.streamingEnabled() !== true) {`
			`console.log(`
			`\x1b[31m[STREAMING DISABLED]\x1b[0m Streaming is not available for ${LLMConnector.constructor.name}. Will use regular chat method.`
			`);`
			`completeText = await LLMConnector.getChatCompletion(messages, {`
			`temperature: embed.workspace?.openAiTemp ?? LLMConnector.defaultTemp,`
			`});`
			`writeResponseChunk(response, {`
			`uuid,`
			`sources: [],`
			`type: "textResponseChunk",`
			`textResponse: completeText,`
			`close: true,`
			`error: false,`
			`});`
			`} else {`
			`const stream = await LLMConnector.streamGetChatCompletion(messages, {`
			`temperature: embed.workspace?.openAiTemp ?? LLMConnector.defaultTemp,`
			`});`
Refactor handleStream to LLM Classes (#685) 2024-02-07 17:15:14 +01:00			`completeText = await LLMConnector.handleStream(response, stream, {`
[FEAT] Embedded AnythingLLM (#656) * WIP embedded app * WIP got response from backend in embedded app * WIP streaming prints to embedded app * implemented streaming and tailwind min for styling into embedded app * WIP embedded app history functional * load params from script tag into embedded app * rough in modularization of embed chat cleanup dev process for easier dev support move all chat to components todo: build process todo: backend support * remove eslint config * Implement models and cleanup embed chat endpoints Improve build process for embed prod minification and bundle size awareness WIP * forgot files * rename to embed folder * introduce chat modal styles * add middleware validations on embed chat * auto open param and default greeting * reset chat history * Admin embed config page * Admin Embed Chats mgmt page * update embed * nonpriv * more style support reopen if chat was last opened * update comments * remove unused imports * allow change of workspace for embedconfig * update failure to lookup message * update reset script * update instructions * Add more styling options Add sponsor text at bottom Support dynamic container height Loading animations * publish new embed script * Add back syntax highlighting and keep bundle small via dynamic script build * add hint * update readme * update copy model for snippet with link to styles --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> 2024-02-05 23:21:34 +01:00			`uuid,`
			`sources: [],`
			`});`
			`}`

			`await EmbedChats.new({`
			`embedId: embed.id,`
			`prompt: message,`
			`response: { text: completeText, type: chatMode },`
			`connection_information: response.locals.connection`
			`? { ...response.locals.connection }`
			`: {},`
			`sessionId,`
			`});`
			`return;`
			`}`

			`// On query we don't return message history. All other chat modes and when chatting`
			`// with no embeddings we return history.`
			`async function recentEmbedChatHistory(`
			`sessionId,`
			`embed,`
			`messageLimit = 20,`
			`chatMode = null`
			`) {`
			`if (chatMode === "query") return [];`
			`const rawHistory = (`
			`await EmbedChats.forEmbedByUser(embed.id, sessionId, messageLimit, {`
			`id: "desc",`
			`})`
			`).reverse();`
			`return { rawHistory, chatHistory: convertToPromptHistory(rawHistory) };`
			`}`

			`async function streamEmptyEmbeddingChat({`
			`response,`
			`uuid,`
			`sessionId,`
			`message,`
			`embed,`
			`messageLimit,`
			`LLMConnector,`
			`}) {`
			`let completeText;`
			`const { rawHistory, chatHistory } = await recentEmbedChatHistory(`
			`sessionId,`
			`embed,`
			`messageLimit`
			`);`

			`if (LLMConnector.streamingEnabled() !== true) {`
			`console.log(`
			`\x1b[31m[STREAMING DISABLED]\x1b[0m Streaming is not available for ${LLMConnector.constructor.name}. Will use regular chat method.`
			`);`
			`completeText = await LLMConnector.sendChat(`
			`chatHistory,`
			`message,`
			`embed.workspace,`
			`rawHistory`
			`);`
			`writeResponseChunk(response, {`
			`uuid,`
			`type: "textResponseChunk",`
			`textResponse: completeText,`
			`sources: [],`
			`close: true,`
			`error: false,`
			`});`
			`}`

			`const stream = await LLMConnector.streamChat(`
			`chatHistory,`
			`message,`
			`embed.workspace,`
			`rawHistory`
			`);`
Refactor handleStream to LLM Classes (#685) 2024-02-07 17:15:14 +01:00			`completeText = await LLMConnector.handleStream(response, stream, {`
[FEAT] Embedded AnythingLLM (#656) * WIP embedded app * WIP got response from backend in embedded app * WIP streaming prints to embedded app * implemented streaming and tailwind min for styling into embedded app * WIP embedded app history functional * load params from script tag into embedded app * rough in modularization of embed chat cleanup dev process for easier dev support move all chat to components todo: build process todo: backend support * remove eslint config * Implement models and cleanup embed chat endpoints Improve build process for embed prod minification and bundle size awareness WIP * forgot files * rename to embed folder * introduce chat modal styles * add middleware validations on embed chat * auto open param and default greeting * reset chat history * Admin embed config page * Admin Embed Chats mgmt page * update embed * nonpriv * more style support reopen if chat was last opened * update comments * remove unused imports * allow change of workspace for embedconfig * update failure to lookup message * update reset script * update instructions * Add more styling options Add sponsor text at bottom Support dynamic container height Loading animations * publish new embed script * Add back syntax highlighting and keep bundle small via dynamic script build * add hint * update readme * update copy model for snippet with link to styles --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> 2024-02-05 23:21:34 +01:00			`uuid,`
			`sources: [],`
			`});`

			`await EmbedChats.new({`
			`embedId: embed.id,`
			`prompt: message,`
			`response: { text: completeText, type: "chat" },`
			`connection_information: response.locals.connection`
			`? { ...response.locals.connection }`
			`: {},`
			`sessionId,`
			`});`
			`return;`
			`}`

			`module.exports = {`
			`streamChatWithForEmbed,`
			`};`