Stop generation button during stream-response (#892)

* Stop generation button during stream-response * add custom stop icon * add stop to thread chats
2024-06-30 14:40:08 +02:00 · 2024-03-12 15:21:27 -07:00 · 2024-03-12 15:21:27 -07:00 · 0e46a11cb6
commit 0e46a11cb6
parent fd2b13017e
16 changed files with 236 additions and 27 deletions
--- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/StopGenerationButton/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/StopGenerationButton/index.jsx
@ -0,0 +1,50 @@
+import { ABORT_STREAM_EVENT } from "@/utils/chat";
+import { Tooltip } from "react-tooltip";
+
+export default function StopGenerationButton() {
+  function emitHaltEvent() {
+    window.dispatchEvent(new CustomEvent(ABORT_STREAM_EVENT));
+  }
+
+  return (
+    <>
+      <button
+        type="button"
+        onClick={emitHaltEvent}
+        data-tooltip-id="stop-generation-button"
+        data-tooltip-content="Stop generating response"
+        className="border-none text-white/60 cursor-pointer group"
+      >
+        <svg
+          width="28"
+          height="28"
+          viewBox="0 0 28 28"
+          fill="none"
+          xmlns="http://www.w3.org/2000/svg"
+        >
+          <circle
+            className="group-hover:stroke-[#46C8FF] stroke-white"
+            cx="10"
+            cy="10.562"
+            r="9"
+            stroke-width="2"
+          />
+          <rect
+            className="group-hover:fill-[#46C8FF] fill-white"
+            x="6.3999"
+            y="6.96204"
+            width="7.2"
+            height="7.2"
+            rx="2"
+          />
+        </svg>
+      </button>
+      <Tooltip
+        id="stop-generation-button"
+        place="bottom"
+        delayShow={300}
+        className="tooltip !text-xs invert"
+      />
+    </>
+  );
+}
--- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/StopGenerationButton/stop.svg
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/StopGenerationButton/stop.svg
@ -0,0 +1,4 @@
+<svg width="21" height="21" viewBox="0 0 21 21" fill="none" xmlns="http://www.w3.org/2000/svg">
+<circle cx="10.8984" cy="10.562" r="9" stroke="white" stroke-width="2"/>
+<rect x="7.29846" y="6.96204" width="7.2" height="7.2" rx="2" fill="white"/>
+</svg>
--- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx
@ -1,4 +1,3 @@
-import { CircleNotch, PaperPlaneRight } from "@phosphor-icons/react";
 import React, { useState, useRef } from "react";
 import SlashCommandsButton, {
  SlashCommands,
@ -6,6 +5,8 @@ import SlashCommandsButton, {
 } from "./SlashCommands";
 import { isMobile } from "react-device-detect";
 import debounce from "lodash.debounce";
+import { PaperPlaneRight } from "@phosphor-icons/react";
+import StopGenerationButton from "./StopGenerationButton";

 export default function PromptInput({
  workspace,
@ -83,19 +84,18 @@ export default function PromptInput({
                className="cursor-text max-h-[100px] md:min-h-[40px] mx-2 md:mx-0 py-2 w-full text-[16px] md:text-md text-white bg-transparent placeholder:text-white/60 resize-none active:outline-none focus:outline-none flex-grow"
                placeholder={"Send a message"}
              />
-              <button
-                ref={formRef}
-                type="submit"
-                disabled={buttonDisabled}
-                className="inline-flex justify-center rounded-2xl cursor-pointer text-white/60 hover:text-white group ml-4"
-              >
-                {buttonDisabled ? (
-                  <CircleNotch className="w-6 h-6 animate-spin" />
-                ) : (
+              {buttonDisabled ? (
+                <StopGenerationButton />
+              ) : (
+                <button
+                  ref={formRef}
+                  type="submit"
+                  className="inline-flex justify-center rounded-2xl cursor-pointer text-white/60 hover:text-white group ml-4"
+                >
                  <PaperPlaneRight className="w-7 h-7 my-3" weight="fill" />
-                )}
-                <span className="sr-only">Send message</span>
-              </button>
+                  <span className="sr-only">Send message</span>
+                </button>
+              )}
            </div>
            <div className="flex justify-between py-3.5">
              <div className="flex gap-x-2">
--- a/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
@ -68,11 +68,7 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
      const remHistory = chatHistory.length > 0 ? chatHistory.slice(0, -1) : [];
      var _chatHistory = [...remHistory];

-      if (!promptMessage || !promptMessage?.userMessage) {
-        setLoadingResponse(false);
-        return false;
-      }
-
+      if (!promptMessage || !promptMessage?.userMessage) return false;
      if (!!threadSlug) {
        await Workspace.threads.streamChat(
          { workspaceSlug: workspace.slug, threadSlug },
--- a/frontend/src/models/workspace.js
+++ b/frontend/src/models/workspace.js
@ -3,6 +3,7 @@ import { baseHeaders } from "@/utils/request";
 import { fetchEventSource } from "@microsoft/fetch-event-source";
 import WorkspaceThread from "@/models/workspaceThread";
 import { v4 } from "uuid";
+import { ABORT_STREAM_EVENT } from "@/utils/chat";

 const Workspace = {
  new: async function (data = {}) {
@ -75,6 +76,16 @@ const Workspace = {
  },
  streamChat: async function ({ slug }, message, handleChat) {
    const ctrl = new AbortController();
+
+    // Listen for the ABORT_STREAM_EVENT key to be emitted by the client
+    // to early abort the streaming response. On abort we send a special `stopGeneration`
+    // event to be handled which resets the UI for us to be able to send another message.
+    // The backend response abort handling is done in each LLM's handleStreamResponse.
+    window.addEventListener(ABORT_STREAM_EVENT, () => {
+      ctrl.abort();
+      handleChat({ id: v4(), type: "stopGeneration" });
+    });
+
    await fetchEventSource(`${API_BASE}/workspace/${slug}/stream-chat`, {
      method: "POST",
      body: JSON.stringify({ message }),
--- a/frontend/src/models/workspaceThread.js
+++ b/frontend/src/models/workspaceThread.js
@ -1,3 +1,4 @@
+import { ABORT_STREAM_EVENT } from "@/utils/chat";
 import { API_BASE } from "@/utils/constants";
 import { baseHeaders } from "@/utils/request";
 import { fetchEventSource } from "@microsoft/fetch-event-source";
@ -80,6 +81,16 @@ const WorkspaceThread = {
    handleChat
  ) {
    const ctrl = new AbortController();
+
+    // Listen for the ABORT_STREAM_EVENT key to be emitted by the client
+    // to early abort the streaming response. On abort we send a special `stopGeneration`
+    // event to be handled which resets the UI for us to be able to send another message.
+    // The backend response abort handling is done in each LLM's handleStreamResponse.
+    window.addEventListener(ABORT_STREAM_EVENT, () => {
+      ctrl.abort();
+      handleChat({ id: v4(), type: "stopGeneration" });
+    });
+
    await fetchEventSource(
      `${API_BASE}/workspace/${workspaceSlug}/thread/${threadSlug}/stream-chat`,
      {
--- a/frontend/src/utils/chat/index.js
+++ b/frontend/src/utils/chat/index.js
@ -1,3 +1,5 @@
+export const ABORT_STREAM_EVENT = "abort-chat-stream";
+
 // For handling of chat responses in the frontend by their various types.
 export default function handleChat(
  chatResult,
@ -108,6 +110,22 @@ export default function handleChat(
      _chatHistory[chatIdx] = updatedHistory;
    }
    setChatHistory([..._chatHistory]);
+    setLoadingResponse(false);
+  } else if (type === "stopGeneration") {
+    const chatIdx = _chatHistory.length - 1;
+    const existingHistory = { ..._chatHistory[chatIdx] };
+    const updatedHistory = {
+      ...existingHistory,
+      sources: [],
+      closed: true,
+      error: null,
+      animate: false,
+      pending: false,
+    };
+    _chatHistory[chatIdx] = updatedHistory;
+
+    setChatHistory([..._chatHistory]);
+    setLoadingResponse(false);
  }
 }

--- a/server/utils/AiProviders/anthropic/index.js
+++ b/server/utils/AiProviders/anthropic/index.js
@ -1,6 +1,9 @@
 const { v4 } = require("uuid");
 const { chatPrompt } = require("../../chats");
-const { writeResponseChunk } = require("../../helpers/chat/responses");
+const {
+  writeResponseChunk,
+  clientAbortedHandler,
+} = require("../../helpers/chat/responses");
 class AnthropicLLM {
  constructor(embedder = null, modelPreference = null) {
    if (!process.env.ANTHROPIC_API_KEY)
@ -150,6 +153,13 @@ class AnthropicLLM {
      let fullText = "";
      const { uuid = v4(), sources = [] } = responseProps;

+      // Establish listener to early-abort a streaming response
+      // in case things go sideways or the user does not like the response.
+      // We preserve the generated text but continue as if chat was completed
+      // to preserve previously generated content.
+      const handleAbort = () => clientAbortedHandler(resolve, fullText);
+      response.on("close", handleAbort);
+
      stream.on("streamEvent", (message) => {
        const data = message;
        if (
@ -181,6 +191,7 @@ class AnthropicLLM {
            close: true,
            error: false,
          });
+          response.removeListener("close", handleAbort);
          resolve(fullText);
        }
      });
--- a/server/utils/AiProviders/azureOpenAi/index.js
+++ b/server/utils/AiProviders/azureOpenAi/index.js
@ -1,6 +1,9 @@
 const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
 const { chatPrompt } = require("../../chats");
-const { writeResponseChunk } = require("../../helpers/chat/responses");
+const {
+  writeResponseChunk,
+  clientAbortedHandler,
+} = require("../../helpers/chat/responses");

 class AzureOpenAiLLM {
  constructor(embedder = null, _modelPreference = null) {
@ -174,6 +177,14 @@ class AzureOpenAiLLM {

    return new Promise(async (resolve) => {
      let fullText = "";
+
+      // Establish listener to early-abort a streaming response
+      // in case things go sideways or the user does not like the response.
+      // We preserve the generated text but continue as if chat was completed
+      // to preserve previously generated content.
+      const handleAbort = () => clientAbortedHandler(resolve, fullText);
+      response.on("close", handleAbort);
+
      for await (const event of stream) {
        for (const choice of event.choices) {
          const delta = choice.delta?.content;
@ -198,6 +209,7 @@ class AzureOpenAiLLM {
        close: true,
        error: false,
      });
+      response.removeListener("close", handleAbort);
      resolve(fullText);
    });
  }
--- a/server/utils/AiProviders/gemini/index.js
+++ b/server/utils/AiProviders/gemini/index.js
@ -1,5 +1,8 @@
 const { chatPrompt } = require("../../chats");
-const { writeResponseChunk } = require("../../helpers/chat/responses");
+const {
+  writeResponseChunk,
+  clientAbortedHandler,
+} = require("../../helpers/chat/responses");

 class GeminiLLM {
  constructor(embedder = null, modelPreference = null) {
@ -198,6 +201,14 @@ class GeminiLLM {

    return new Promise(async (resolve) => {
      let fullText = "";
+
+      // Establish listener to early-abort a streaming response
+      // in case things go sideways or the user does not like the response.
+      // We preserve the generated text but continue as if chat was completed
+      // to preserve previously generated content.
+      const handleAbort = () => clientAbortedHandler(resolve, fullText);
+      response.on("close", handleAbort);
+
      for await (const chunk of stream) {
        fullText += chunk.text();
        writeResponseChunk(response, {
@ -218,6 +229,7 @@ class GeminiLLM {
        close: true,
        error: false,
      });
+      response.removeListener("close", handleAbort);
      resolve(fullText);
    });
  }
--- a/server/utils/AiProviders/huggingface/index.js
+++ b/server/utils/AiProviders/huggingface/index.js
@ -1,7 +1,10 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
 const { chatPrompt } = require("../../chats");
-const { writeResponseChunk } = require("../../helpers/chat/responses");
+const {
+  writeResponseChunk,
+  clientAbortedHandler,
+} = require("../../helpers/chat/responses");

 class HuggingFaceLLM {
  constructor(embedder = null, _modelPreference = null) {
@ -172,6 +175,14 @@ class HuggingFaceLLM {
    return new Promise((resolve) => {
      let fullText = "";
      let chunk = "";
+
+      // Establish listener to early-abort a streaming response
+      // in case things go sideways or the user does not like the response.
+      // We preserve the generated text but continue as if chat was completed
+      // to preserve previously generated content.
+      const handleAbort = () => clientAbortedHandler(resolve, fullText);
+      response.on("close", handleAbort);
+
      stream.data.on("data", (data) => {
        const lines = data
          ?.toString()
@ -218,6 +229,7 @@ class HuggingFaceLLM {
              close: true,
              error: false,
            });
+            response.removeListener("close", handleAbort);
            resolve(fullText);
          } else {
            let error = null;
@ -241,6 +253,7 @@ class HuggingFaceLLM {
                close: true,
                error,
              });
+              response.removeListener("close", handleAbort);
              resolve("");
              return;
            }
@ -266,6 +279,7 @@ class HuggingFaceLLM {
                close: true,
                error: false,
              });
+              response.removeListener("close", handleAbort);
              resolve(fullText);
            }
          }
--- a/server/utils/AiProviders/native/index.js
+++ b/server/utils/AiProviders/native/index.js
@ -2,7 +2,10 @@ const fs = require("fs");
 const path = require("path");
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { chatPrompt } = require("../../chats");
-const { writeResponseChunk } = require("../../helpers/chat/responses");
+const {
+  writeResponseChunk,
+  clientAbortedHandler,
+} = require("../../helpers/chat/responses");

 // Docs: https://api.js.langchain.com/classes/chat_models_llama_cpp.ChatLlamaCpp.html
 const ChatLlamaCpp = (...args) =>
@ -176,6 +179,14 @@ class NativeLLM {

    return new Promise(async (resolve) => {
      let fullText = "";
+
+      // Establish listener to early-abort a streaming response
+      // in case things go sideways or the user does not like the response.
+      // We preserve the generated text but continue as if chat was completed
+      // to preserve previously generated content.
+      const handleAbort = () => clientAbortedHandler(resolve, fullText);
+      response.on("close", handleAbort);
+
      for await (const chunk of stream) {
        if (chunk === undefined)
          throw new Error(
@ -202,6 +213,7 @@ class NativeLLM {
        close: true,
        error: false,
      });
+      response.removeListener("close", handleAbort);
      resolve(fullText);
    });
  }
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@ -1,6 +1,9 @@
 const { chatPrompt } = require("../../chats");
 const { StringOutputParser } = require("langchain/schema/output_parser");
-const { writeResponseChunk } = require("../../helpers/chat/responses");
+const {
+  writeResponseChunk,
+  clientAbortedHandler,
+} = require("../../helpers/chat/responses");

 // Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
 class OllamaAILLM {
@ -180,8 +183,16 @@ class OllamaAILLM {
    const { uuid = uuidv4(), sources = [] } = responseProps;

    return new Promise(async (resolve) => {
+      let fullText = "";
+
+      // Establish listener to early-abort a streaming response
+      // in case things go sideways or the user does not like the response.
+      // We preserve the generated text but continue as if chat was completed
+      // to preserve previously generated content.
+      const handleAbort = () => clientAbortedHandler(resolve, fullText);
+      response.on("close", handleAbort);
+
      try {
-        let fullText = "";
        for await (const chunk of stream) {
          if (chunk === undefined)
            throw new Error(
@ -210,6 +221,7 @@ class OllamaAILLM {
          close: true,
          error: false,
        });
+        response.removeListener("close", handleAbort);
        resolve(fullText);
      } catch (error) {
        writeResponseChunk(response, {
@ -222,6 +234,7 @@ class OllamaAILLM {
            error?.cause ?? error.message
          }`,
        });
+        response.removeListener("close", handleAbort);
      }
    });
  }
--- a/server/utils/AiProviders/openRouter/index.js
+++ b/server/utils/AiProviders/openRouter/index.js
@ -1,7 +1,10 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { chatPrompt } = require("../../chats");
 const { v4: uuidv4 } = require("uuid");
-const { writeResponseChunk } = require("../../helpers/chat/responses");
+const {
+  writeResponseChunk,
+  clientAbortedHandler,
+} = require("../../helpers/chat/responses");

 function openRouterModels() {
  const { MODELS } = require("./models.js");
@ -195,6 +198,13 @@ class OpenRouterLLM {
      let chunk = "";
      let lastChunkTime = null; // null when first token is still not received.

+      // Establish listener to early-abort a streaming response
+      // in case things go sideways or the user does not like the response.
+      // We preserve the generated text but continue as if chat was completed
+      // to preserve previously generated content.
+      const handleAbort = () => clientAbortedHandler(resolve, fullText);
+      response.on("close", handleAbort);
+
      // NOTICE: Not all OpenRouter models will return a stop reason
      // which keeps the connection open and so the model never finalizes the stream
      // like the traditional OpenAI response schema does. So in the case the response stream
@ -220,6 +230,7 @@ class OpenRouterLLM {
            error: false,
          });
          clearInterval(timeoutCheck);
+          response.removeListener("close", handleAbort);
          resolve(fullText);
        }
      }, 500);
@ -269,6 +280,7 @@ class OpenRouterLLM {
              error: false,
            });
            clearInterval(timeoutCheck);
+            response.removeListener("close", handleAbort);
            resolve(fullText);
          } else {
            let finishReason = null;
@ -305,6 +317,7 @@ class OpenRouterLLM {
                error: false,
              });
              clearInterval(timeoutCheck);
+              response.removeListener("close", handleAbort);
              resolve(fullText);
            }
          }
--- a/server/utils/AiProviders/togetherAi/index.js
+++ b/server/utils/AiProviders/togetherAi/index.js
@ -1,5 +1,8 @@
 const { chatPrompt } = require("../../chats");
-const { writeResponseChunk } = require("../../helpers/chat/responses");
+const {
+  writeResponseChunk,
+  clientAbortedHandler,
+} = require("../../helpers/chat/responses");

 function togetherAiModels() {
  const { MODELS } = require("./models.js");
@ -185,6 +188,14 @@ class TogetherAiLLM {
    return new Promise((resolve) => {
      let fullText = "";
      let chunk = "";
+
+      // Establish listener to early-abort a streaming response
+      // in case things go sideways or the user does not like the response.
+      // We preserve the generated text but continue as if chat was completed
+      // to preserve previously generated content.
+      const handleAbort = () => clientAbortedHandler(resolve, fullText);
+      response.on("close", handleAbort);
+
      stream.data.on("data", (data) => {
        const lines = data
          ?.toString()
@ -230,6 +241,7 @@ class TogetherAiLLM {
              close: true,
              error: false,
            });
+            response.removeListener("close", handleAbort);
            resolve(fullText);
          } else {
            let finishReason = null;
@ -263,6 +275,7 @@ class TogetherAiLLM {
                close: true,
                error: false,
              });
+              response.removeListener("close", handleAbort);
              resolve(fullText);
            }
          }
--- a/server/utils/helpers/chat/responses.js
+++ b/server/utils/helpers/chat/responses.js
@ -1,6 +1,14 @@
 const { v4: uuidv4 } = require("uuid");
 const moment = require("moment");

+function clientAbortedHandler(resolve, fullText) {
+  console.log(
+    "\x1b[43m\x1b[34m[STREAM ABORTED]\x1b[0m Client requested to abort stream. Exiting LLM stream handler early."
+  );
+  resolve(fullText);
+  return;
+}
+
 // The default way to handle a stream response. Functions best with OpenAI.
 // Currently used for LMStudio, LocalAI, Mistral API, and OpenAI
 function handleDefaultStreamResponse(response, stream, responseProps) {
@ -9,6 +17,14 @@ function handleDefaultStreamResponse(response, stream, responseProps) {
  return new Promise((resolve) => {
    let fullText = "";
    let chunk = "";
+
+    // Establish listener to early-abort a streaming response
+    // in case things go sideways or the user does not like the response.
+    // We preserve the generated text but continue as if chat was completed
+    // to preserve previously generated content.
+    const handleAbort = () => clientAbortedHandler(resolve, fullText);
+    response.on("close", handleAbort);
+
    stream.data.on("data", (data) => {
      const lines = data
        ?.toString()
@ -52,6 +68,7 @@ function handleDefaultStreamResponse(response, stream, responseProps) {
            close: true,
            error: false,
          });
+          response.removeListener("close", handleAbort);
          resolve(fullText);
        } else {
          let finishReason = null;
@ -85,6 +102,7 @@ function handleDefaultStreamResponse(response, stream, responseProps) {
              close: true,
              error: false,
            });
+            response.removeListener("close", handleAbort);
            resolve(fullText);
          }
        }
@ -141,4 +159,5 @@ module.exports = {
  convertToChatHistory,
  convertToPromptHistory,
  writeResponseChunk,
+  clientAbortedHandler,
 };