anything-llm/server/utils/chats/openaiCompatible.js

const { v4: uuidv4 } = require("uuid");
const { DocumentManager } = require("../DocumentManager");
const { WorkspaceChats } = require("../../models/workspaceChats");
const { getVectorDbClass, getLLMProvider } = require("../helpers");
const { writeResponseChunk } = require("../helpers/chat/responses");
const { chatPrompt, sourceIdentifier } = require("./index");

const { PassThrough } = require("stream");

async function chatSync({
  workspace,
  systemPrompt = null,
  history = [],
  prompt = null,
  temperature = null,
}) {
  const uuid = uuidv4();
  const chatMode = workspace?.chatMode ?? "chat";
  const LLMConnector = getLLMProvider({
    provider: workspace?.chatProvider,
    model: workspace?.chatModel,
  });
  const VectorDb = getVectorDbClass();
  const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
  const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);

  // User is trying to query-mode chat a workspace that has no data in it - so
  // we should exit early as no information can be found under these conditions.
  if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
    const textResponse =
      workspace?.queryRefusalResponse ??
      "There is no relevant information in this workspace to answer your query.";

    await WorkspaceChats.new({
      workspaceId: workspace.id,
      prompt: String(prompt),
      response: {
        text: textResponse,
        sources: [],
        type: chatMode,
      },
      include: false,
    });

    return formatJSON(
      {
        id: uuid,
        type: "textResponse",
        sources: [],
        close: true,
        error: null,
        textResponse,
      },
      { model: workspace.slug, finish_reason: "abort" }
    );
  }

  // If we are here we know that we are in a workspace that is:
  // 1. Chatting in "chat" mode and may or may _not_ have embeddings
  // 2. Chatting in "query" mode and has at least 1 embedding
  let contextTexts = [];
  let sources = [];
  let pinnedDocIdentifiers = [];
  await new DocumentManager({
    workspace,
    maxTokens: LLMConnector.promptWindowLimit(),
  })
    .pinnedDocs()
    .then((pinnedDocs) => {
      pinnedDocs.forEach((doc) => {
        const { pageContent, ...metadata } = doc;
        pinnedDocIdentifiers.push(sourceIdentifier(doc));
        contextTexts.push(doc.pageContent);
        sources.push({
          text:
            pageContent.slice(0, 1_000) +
            "...continued on in source document...",
          ...metadata,
        });
      });
    });

  const vectorSearchResults =
    embeddingsCount !== 0
      ? await VectorDb.performSimilaritySearch({
          namespace: workspace.slug,
          input: prompt,
          LLMConnector,
          similarityThreshold: workspace?.similarityThreshold,
          topN: workspace?.topN,
          filterIdentifiers: pinnedDocIdentifiers,
        })
      : {
          contextTexts: [],
          sources: [],
          message: null,
        };

  // Failed similarity search if it was run at all and failed.
  if (!!vectorSearchResults.message) {
    return formatJSON(
      {
        id: uuid,
        type: "abort",
        textResponse: null,
        sources: [],
        close: true,
        error: vectorSearchResults.message,
      },
      { model: workspace.slug, finish_reason: "abort" }
    );
  }

  // For OpenAI Compatible chats, we cannot do backfilling so we simply aggregate results here.
  contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
  sources = [...sources, ...vectorSearchResults.sources];

  // If in query mode and no context chunks are found from search, backfill, or pins -  do not
  // let the LLM try to hallucinate a response or use general knowledge and exit early
  if (chatMode === "query" && contextTexts.length === 0) {
    const textResponse =
      workspace?.queryRefusalResponse ??
      "There is no relevant information in this workspace to answer your query.";

    await WorkspaceChats.new({
      workspaceId: workspace.id,
      prompt: prompt,
      response: {
        text: textResponse,
        sources: [],
        type: chatMode,
      },
      include: false,
    });

    return formatJSON(
      {
        id: uuid,
        type: "textResponse",
        sources: [],
        close: true,
        error: null,
        textResponse,
      },
      { model: workspace.slug, finish_reason: "no_content" }
    );
  }

  // Compress & Assemble message to ensure prompt passes token limit with room for response
  // and build system messages based on inputs and history.
  const messages = await LLMConnector.compressMessages({
    systemPrompt: systemPrompt ?? chatPrompt(workspace),
    userPrompt: prompt,
    contextTexts,
    chatHistory: history,
  });

  // Send the text completion.
  const textResponse = await LLMConnector.getChatCompletion(messages, {
    temperature:
      temperature ?? workspace?.openAiTemp ?? LLMConnector.defaultTemp,
  });

  if (!textResponse) {
    return formatJSON(
      {
        id: uuid,
        type: "textResponse",
        sources: [],
        close: true,
        error: "No text completion could be completed with this input.",
        textResponse: null,
      },
      { model: workspace.slug, finish_reason: "no_content" }
    );
  }

  const { chat } = await WorkspaceChats.new({
    workspaceId: workspace.id,
    prompt: prompt,
    response: { text: textResponse, sources, type: chatMode },
  });

  return formatJSON(
    {
      id: uuid,
      type: "textResponse",
      close: true,
      error: null,
      chatId: chat.id,
      textResponse,
      sources,
    },
    { model: workspace.slug, finish_reason: "stop" }
  );
}

async function streamChat({
  workspace,
  response,
  systemPrompt = null,
  history = [],
  prompt = null,
  temperature = null,
}) {
  const uuid = uuidv4();
  const chatMode = workspace?.chatMode ?? "chat";
  const LLMConnector = getLLMProvider({
    provider: workspace?.chatProvider,
    model: workspace?.chatModel,
  });
  const VectorDb = getVectorDbClass();
  const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
  const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);

  // We don't want to write a new method for every LLM to support openAI calls
  // via the `handleStreamResponseV2` method handler. So here we create a passthrough
  // that on writes to the main response, transforms the chunk to OpenAI format.
  // The chunk is coming in the format from `writeResponseChunk` but in the AnythingLLM
  // response chunk schema, so we here we mutate each chunk.
  const responseInterceptor = new PassThrough({});
  responseInterceptor.on("data", (chunk) => {
    try {
      const originalData = JSON.parse(chunk.toString().split("data: ")[1]);
      const modified = formatJSON(originalData, {
        chunked: true,
        model: workspace.slug,
      }); // rewrite to OpenAI format
      response.write(`data: ${JSON.stringify(modified)}\n\n`);
    } catch (e) {
      console.error(e);
    }
  });

  // User is trying to query-mode chat a workspace that has no data in it - so
  // we should exit early as no information can be found under these conditions.
  if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
    const textResponse =
      workspace?.queryRefusalResponse ??
      "There is no relevant information in this workspace to answer your query.";

    await WorkspaceChats.new({
      workspaceId: workspace.id,
      prompt: String(prompt),
      response: {
        text: textResponse,
        sources: [],
        type: chatMode,
      },
      include: false,
    });

    writeResponseChunk(
      response,
      formatJSON(
        {
          id: uuid,
          type: "textResponse",
          sources: [],
          close: true,
          error: null,
          textResponse,
        },
        { chunked: true, model: workspace.slug, finish_reason: "abort" }
      )
    );
    return;
  }

  // If we are here we know that we are in a workspace that is:
  // 1. Chatting in "chat" mode and may or may _not_ have embeddings
  // 2. Chatting in "query" mode and has at least 1 embedding
  let contextTexts = [];
  let sources = [];
  let pinnedDocIdentifiers = [];
  await new DocumentManager({
    workspace,
    maxTokens: LLMConnector.promptWindowLimit(),
  })
    .pinnedDocs()
    .then((pinnedDocs) => {
      pinnedDocs.forEach((doc) => {
        const { pageContent, ...metadata } = doc;
        pinnedDocIdentifiers.push(sourceIdentifier(doc));
        contextTexts.push(doc.pageContent);
        sources.push({
          text:
            pageContent.slice(0, 1_000) +
            "...continued on in source document...",
          ...metadata,
        });
      });
    });

  const vectorSearchResults =
    embeddingsCount !== 0
      ? await VectorDb.performSimilaritySearch({
          namespace: workspace.slug,
          input: prompt,
          LLMConnector,
          similarityThreshold: workspace?.similarityThreshold,
          topN: workspace?.topN,
          filterIdentifiers: pinnedDocIdentifiers,
        })
      : {
          contextTexts: [],
          sources: [],
          message: null,
        };

  // Failed similarity search if it was run at all and failed.
  if (!!vectorSearchResults.message) {
    writeResponseChunk(
      response,
      formatJSON(
        {
          id: uuid,
          type: "abort",
          textResponse: null,
          sources: [],
          close: true,
          error: vectorSearchResults.message,
        },
        { chunked: true, model: workspace.slug, finish_reason: "abort" }
      )
    );
    return;
  }

  // For OpenAI Compatible chats, we cannot do backfilling so we simply aggregate results here.
  contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
  sources = [...sources, ...vectorSearchResults.sources];

  // If in query mode and no context chunks are found from search, backfill, or pins -  do not
  // let the LLM try to hallucinate a response or use general knowledge and exit early
  if (chatMode === "query" && contextTexts.length === 0) {
    const textResponse =
      workspace?.queryRefusalResponse ??
      "There is no relevant information in this workspace to answer your query.";

    await WorkspaceChats.new({
      workspaceId: workspace.id,
      prompt: prompt,
      response: {
        text: textResponse,
        sources: [],
        type: chatMode,
      },
      include: false,
    });

    writeResponseChunk(
      response,
      formatJSON(
        {
          id: uuid,
          type: "textResponse",
          sources: [],
          close: true,
          error: null,
          textResponse,
        },
        { chunked: true, model: workspace.slug, finish_reason: "no_content" }
      )
    );
    return;
  }

  // Compress & Assemble message to ensure prompt passes token limit with room for response
  // and build system messages based on inputs and history.
  const messages = await LLMConnector.compressMessages({
    systemPrompt: systemPrompt ?? chatPrompt(workspace),
    userPrompt: prompt,
    contextTexts,
    chatHistory: history,
  });

  if (!LLMConnector.streamingEnabled()) {
    writeResponseChunk(
      response,
      formatJSON(
        {
          id: uuid,
          type: "textResponse",
          sources: [],
          close: true,
          error: "Streaming is not available for the connected LLM Provider",
          textResponse: null,
        },
        {
          chunked: true,
          model: workspace.slug,
          finish_reason: "streaming_disabled",
        }
      )
    );
    return;
  }

  const stream = await LLMConnector.streamGetChatCompletion(messages, {
    temperature:
      temperature ?? workspace?.openAiTemp ?? LLMConnector.defaultTemp,
  });
  const completeText = await LLMConnector.handleStream(
    responseInterceptor,
    stream,
    {
      uuid,
      sources,
    }
  );

  if (completeText?.length > 0) {
    const { chat } = await WorkspaceChats.new({
      workspaceId: workspace.id,
      prompt: prompt,
      response: { text: completeText, sources, type: chatMode },
    });

    writeResponseChunk(
      response,
      formatJSON(
        {
          uuid,
          type: "finalizeResponseStream",
          close: true,
          error: false,
          chatId: chat.id,
          textResponse: "",
        },
        { chunked: true, model: workspace.slug, finish_reason: "stop" }
      )
    );
    return;
  }

  writeResponseChunk(
    response,
    formatJSON(
      {
        uuid,
        type: "finalizeResponseStream",
        close: true,
        error: false,
        textResponse: "",
      },
      { chunked: true, model: workspace.slug, finish_reason: "stop" }
    )
  );
  return;
}

function formatJSON(chat, { chunked = false, model, finish_reason = null }) {
  const data = {
    id: chat.uuid ?? chat.id,
    object: "chat.completion",
    created: Number(new Date()),
    model: model,
    choices: [
      {
        [chunked ? "delta" : "message"]: {
          role: "assistant",
          content: chat.textResponse,
        },
        logprobs: null,
        finish_reason: finish_reason,
      },
    ],
  };

  return data;
}

module.exports.OpenAICompatibleChat = {
  chatSync,
  streamChat,
};
Add OpenAI compatible API support (#1757) 2024-06-25 03:28:21 +02:00			`const { v4: uuidv4 } = require("uuid");`
			`const { DocumentManager } = require("../DocumentManager");`
			`const { WorkspaceChats } = require("../../models/workspaceChats");`
			`const { getVectorDbClass, getLLMProvider } = require("../helpers");`
			`const { writeResponseChunk } = require("../helpers/chat/responses");`
			`const { chatPrompt, sourceIdentifier } = require("./index");`

			`const { PassThrough } = require("stream");`

			`async function chatSync({`
			`workspace,`
			`systemPrompt = null,`
			`history = [],`
			`prompt = null,`
			`temperature = null,`
			`}) {`
			`const uuid = uuidv4();`
			`const chatMode = workspace?.chatMode ?? "chat";`
			`const LLMConnector = getLLMProvider({`
			`provider: workspace?.chatProvider,`
			`model: workspace?.chatModel,`
			`});`
			`const VectorDb = getVectorDbClass();`
			`const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);`
			`const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);`

			`// User is trying to query-mode chat a workspace that has no data in it - so`
			`// we should exit early as no information can be found under these conditions.`
			`if ((!hasVectorizedSpace \|\| embeddingsCount === 0) && chatMode === "query") {`
			`const textResponse =`
			`workspace?.queryRefusalResponse ??`
			`"There is no relevant information in this workspace to answer your query.";`

			`await WorkspaceChats.new({`
			`workspaceId: workspace.id,`
			`prompt: String(prompt),`
			`response: {`
			`text: textResponse,`
			`sources: [],`
			`type: chatMode,`
			`},`
			`include: false,`
			`});`

			`return formatJSON(`
			`{`
			`id: uuid,`
			`type: "textResponse",`
			`sources: [],`
			`close: true,`
			`error: null,`
			`textResponse,`
			`},`
			`{ model: workspace.slug, finish_reason: "abort" }`
			`);`
			`}`

			`// If we are here we know that we are in a workspace that is:`
			`// 1. Chatting in "chat" mode and may or may _not_ have embeddings`
			`// 2. Chatting in "query" mode and has at least 1 embedding`
			`let contextTexts = [];`
			`let sources = [];`
			`let pinnedDocIdentifiers = [];`
			`await new DocumentManager({`
			`workspace,`
			`maxTokens: LLMConnector.promptWindowLimit(),`
			`})`
			`.pinnedDocs()`
			`.then((pinnedDocs) => {`
			`pinnedDocs.forEach((doc) => {`
			`const { pageContent, ...metadata } = doc;`
			`pinnedDocIdentifiers.push(sourceIdentifier(doc));`
			`contextTexts.push(doc.pageContent);`
			`sources.push({`
			`text:`
			`pageContent.slice(0, 1_000) +`
			`"...continued on in source document...",`
			`...metadata,`
			`});`
			`});`
			`});`

			`const vectorSearchResults =`
			`embeddingsCount !== 0`
			`? await VectorDb.performSimilaritySearch({`
			`namespace: workspace.slug,`
			`input: prompt,`
			`LLMConnector,`
			`similarityThreshold: workspace?.similarityThreshold,`
			`topN: workspace?.topN,`
			`filterIdentifiers: pinnedDocIdentifiers,`
			`})`
			`: {`
			`contextTexts: [],`
			`sources: [],`
			`message: null,`
			`};`

			`// Failed similarity search if it was run at all and failed.`
			`if (!!vectorSearchResults.message) {`
			`return formatJSON(`
			`{`
			`id: uuid,`
			`type: "abort",`
			`textResponse: null,`
			`sources: [],`
			`close: true,`
			`error: vectorSearchResults.message,`
			`},`
			`{ model: workspace.slug, finish_reason: "abort" }`
			`);`
			`}`

			`// For OpenAI Compatible chats, we cannot do backfilling so we simply aggregate results here.`
[FIX] OpenAI compatible endpoints query mode developer API bug fix (#1789) fix query mode always responding with refusal message on develop api openai compatible endpoints 2024-06-28 23:23:16 +02:00			`contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];`
			`sources = [...sources, ...vectorSearchResults.sources];`
Add OpenAI compatible API support (#1757) 2024-06-25 03:28:21 +02:00
			`// If in query mode and no context chunks are found from search, backfill, or pins - do not`
			`// let the LLM try to hallucinate a response or use general knowledge and exit early`
			`if (chatMode === "query" && contextTexts.length === 0) {`
			`const textResponse =`
			`workspace?.queryRefusalResponse ??`
			`"There is no relevant information in this workspace to answer your query.";`

			`await WorkspaceChats.new({`
			`workspaceId: workspace.id,`
			`prompt: prompt,`
			`response: {`
			`text: textResponse,`
			`sources: [],`
			`type: chatMode,`
			`},`
			`include: false,`
			`});`

			`return formatJSON(`
			`{`
			`id: uuid,`
			`type: "textResponse",`
			`sources: [],`
			`close: true,`
			`error: null,`
			`textResponse,`
			`},`
			`{ model: workspace.slug, finish_reason: "no_content" }`
			`);`
			`}`

			`// Compress & Assemble message to ensure prompt passes token limit with room for response`
			`// and build system messages based on inputs and history.`
			`const messages = await LLMConnector.compressMessages({`
			`systemPrompt: systemPrompt ?? chatPrompt(workspace),`
			`userPrompt: prompt,`
			`contextTexts,`
			`chatHistory: history,`
			`});`

			`// Send the text completion.`
			`const textResponse = await LLMConnector.getChatCompletion(messages, {`
			`temperature:`
			`temperature ?? workspace?.openAiTemp ?? LLMConnector.defaultTemp,`
			`});`

			`if (!textResponse) {`
			`return formatJSON(`
			`{`
			`id: uuid,`
			`type: "textResponse",`
			`sources: [],`
			`close: true,`
			`error: "No text completion could be completed with this input.",`
			`textResponse: null,`
			`},`
			`{ model: workspace.slug, finish_reason: "no_content" }`
			`);`
			`}`

			`const { chat } = await WorkspaceChats.new({`
			`workspaceId: workspace.id,`
			`prompt: prompt,`
			`response: { text: textResponse, sources, type: chatMode },`
			`});`

			`return formatJSON(`
			`{`
			`id: uuid,`
			`type: "textResponse",`
			`close: true,`
			`error: null,`
			`chatId: chat.id,`
			`textResponse,`
			`sources,`
			`},`
			`{ model: workspace.slug, finish_reason: "stop" }`
			`);`
			`}`

			`async function streamChat({`
			`workspace,`
			`response,`
			`systemPrompt = null,`
			`history = [],`
			`prompt = null,`
			`temperature = null,`
			`}) {`
			`const uuid = uuidv4();`
			`const chatMode = workspace?.chatMode ?? "chat";`
			`const LLMConnector = getLLMProvider({`
			`provider: workspace?.chatProvider,`
			`model: workspace?.chatModel,`
			`});`
			`const VectorDb = getVectorDbClass();`
			`const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);`
			`const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);`

			`// We don't want to write a new method for every LLM to support openAI calls`
			// via the `handleStreamResponseV2` method handler. So here we create a passthrough
			`// that on writes to the main response, transforms the chunk to OpenAI format.`
			// The chunk is coming in the format from `writeResponseChunk` but in the AnythingLLM
			`// response chunk schema, so we here we mutate each chunk.`
			`const responseInterceptor = new PassThrough({});`
			`responseInterceptor.on("data", (chunk) => {`
			`try {`
			`const originalData = JSON.parse(chunk.toString().split("data: ")[1]);`
			`const modified = formatJSON(originalData, {`
			`chunked: true,`
			`model: workspace.slug,`
			`}); // rewrite to OpenAI format`
			response.write(`data: ${JSON.stringify(modified)}\n\n`);
			`} catch (e) {`
			`console.error(e);`
			`}`
			`});`

			`// User is trying to query-mode chat a workspace that has no data in it - so`
			`// we should exit early as no information can be found under these conditions.`
			`if ((!hasVectorizedSpace \|\| embeddingsCount === 0) && chatMode === "query") {`
			`const textResponse =`
			`workspace?.queryRefusalResponse ??`
			`"There is no relevant information in this workspace to answer your query.";`

			`await WorkspaceChats.new({`
			`workspaceId: workspace.id,`
			`prompt: String(prompt),`
			`response: {`
			`text: textResponse,`
			`sources: [],`
			`type: chatMode,`
			`},`
			`include: false,`
			`});`

			`writeResponseChunk(`
			`response,`
			`formatJSON(`
			`{`
			`id: uuid,`
			`type: "textResponse",`
			`sources: [],`
			`close: true,`
			`error: null,`
			`textResponse,`
			`},`
			`{ chunked: true, model: workspace.slug, finish_reason: "abort" }`
			`)`
			`);`
			`return;`
			`}`

			`// If we are here we know that we are in a workspace that is:`
			`// 1. Chatting in "chat" mode and may or may _not_ have embeddings`
			`// 2. Chatting in "query" mode and has at least 1 embedding`
			`let contextTexts = [];`
			`let sources = [];`
			`let pinnedDocIdentifiers = [];`
			`await new DocumentManager({`
			`workspace,`
			`maxTokens: LLMConnector.promptWindowLimit(),`
			`})`
			`.pinnedDocs()`
			`.then((pinnedDocs) => {`
			`pinnedDocs.forEach((doc) => {`
			`const { pageContent, ...metadata } = doc;`
			`pinnedDocIdentifiers.push(sourceIdentifier(doc));`
			`contextTexts.push(doc.pageContent);`
			`sources.push({`
			`text:`
			`pageContent.slice(0, 1_000) +`
			`"...continued on in source document...",`
			`...metadata,`
			`});`
			`});`
			`});`

			`const vectorSearchResults =`
			`embeddingsCount !== 0`
			`? await VectorDb.performSimilaritySearch({`
			`namespace: workspace.slug,`
			`input: prompt,`
			`LLMConnector,`
			`similarityThreshold: workspace?.similarityThreshold,`
			`topN: workspace?.topN,`
			`filterIdentifiers: pinnedDocIdentifiers,`
			`})`
			`: {`
			`contextTexts: [],`
			`sources: [],`
			`message: null,`
			`};`

			`// Failed similarity search if it was run at all and failed.`
			`if (!!vectorSearchResults.message) {`
			`writeResponseChunk(`
			`response,`
			`formatJSON(`
			`{`
			`id: uuid,`
			`type: "abort",`
			`textResponse: null,`
			`sources: [],`
			`close: true,`
			`error: vectorSearchResults.message,`
			`},`
			`{ chunked: true, model: workspace.slug, finish_reason: "abort" }`
			`)`
			`);`
			`return;`
			`}`

			`// For OpenAI Compatible chats, we cannot do backfilling so we simply aggregate results here.`
[FIX] OpenAI compatible endpoints query mode developer API bug fix (#1789) fix query mode always responding with refusal message on develop api openai compatible endpoints 2024-06-28 23:23:16 +02:00			`contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];`
			`sources = [...sources, ...vectorSearchResults.sources];`
Add OpenAI compatible API support (#1757) 2024-06-25 03:28:21 +02:00
			`// If in query mode and no context chunks are found from search, backfill, or pins - do not`
			`// let the LLM try to hallucinate a response or use general knowledge and exit early`
			`if (chatMode === "query" && contextTexts.length === 0) {`
			`const textResponse =`
			`workspace?.queryRefusalResponse ??`
			`"There is no relevant information in this workspace to answer your query.";`

			`await WorkspaceChats.new({`
			`workspaceId: workspace.id,`
			`prompt: prompt,`
			`response: {`
			`text: textResponse,`
			`sources: [],`
			`type: chatMode,`
			`},`
			`include: false,`
			`});`

			`writeResponseChunk(`
			`response,`
			`formatJSON(`
			`{`
			`id: uuid,`
			`type: "textResponse",`
			`sources: [],`
			`close: true,`
			`error: null,`
			`textResponse,`
			`},`
			`{ chunked: true, model: workspace.slug, finish_reason: "no_content" }`
			`)`
			`);`
			`return;`
			`}`

			`// Compress & Assemble message to ensure prompt passes token limit with room for response`
			`// and build system messages based on inputs and history.`
			`const messages = await LLMConnector.compressMessages({`
			`systemPrompt: systemPrompt ?? chatPrompt(workspace),`
			`userPrompt: prompt,`
			`contextTexts,`
			`chatHistory: history,`
			`});`

			`if (!LLMConnector.streamingEnabled()) {`
			`writeResponseChunk(`
			`response,`
			`formatJSON(`
			`{`
			`id: uuid,`
			`type: "textResponse",`
			`sources: [],`
			`close: true,`
			`error: "Streaming is not available for the connected LLM Provider",`
			`textResponse: null,`
			`},`
			`{`
			`chunked: true,`
			`model: workspace.slug,`
			`finish_reason: "streaming_disabled",`
			`}`
			`)`
			`);`
			`return;`
			`}`

			`const stream = await LLMConnector.streamGetChatCompletion(messages, {`
			`temperature:`
			`temperature ?? workspace?.openAiTemp ?? LLMConnector.defaultTemp,`
			`});`
			`const completeText = await LLMConnector.handleStream(`
			`responseInterceptor,`
			`stream,`
			`{`
			`uuid,`
			`sources,`
			`}`
			`);`

			`if (completeText?.length > 0) {`
			`const { chat } = await WorkspaceChats.new({`
			`workspaceId: workspace.id,`
			`prompt: prompt,`
			`response: { text: completeText, sources, type: chatMode },`
			`});`

			`writeResponseChunk(`
			`response,`
			`formatJSON(`
			`{`
			`uuid,`
			`type: "finalizeResponseStream",`
			`close: true,`
			`error: false,`
			`chatId: chat.id,`
			`textResponse: "",`
			`},`
			`{ chunked: true, model: workspace.slug, finish_reason: "stop" }`
			`)`
			`);`
			`return;`
			`}`

			`writeResponseChunk(`
			`response,`
			`formatJSON(`
			`{`
			`uuid,`
			`type: "finalizeResponseStream",`
			`close: true,`
			`error: false,`
			`textResponse: "",`
			`},`
			`{ chunked: true, model: workspace.slug, finish_reason: "stop" }`
			`)`
			`);`
			`return;`
			`}`

			`function formatJSON(chat, { chunked = false, model, finish_reason = null }) {`
			`const data = {`
			`id: chat.uuid ?? chat.id,`
			`object: "chat.completion",`
			`created: Number(new Date()),`
			`model: model,`
			`choices: [`
			`{`
			`[chunked ? "delta" : "message"]: {`
			`role: "assistant",`
			`content: chat.textResponse,`
			`},`
			`logprobs: null,`
			`finish_reason: finish_reason,`
			`},`
			`],`
			`};`

			`return data;`
			`}`

			`module.exports.OpenAICompatibleChat = {`
			`chatSync,`
			`streamChat,`
			`};`