Stop generation button during stream-response (#892)

* Stop generation button during stream-response

* add custom stop icon

* add stop to thread chats
This commit is contained in:
Timothy Carambat 2024-03-12 15:21:27 -07:00 committed by GitHub
parent fd2b13017e
commit 0e46a11cb6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 236 additions and 27 deletions

View File

@ -0,0 +1,50 @@
import { ABORT_STREAM_EVENT } from "@/utils/chat";
import { Tooltip } from "react-tooltip";
export default function StopGenerationButton() {
function emitHaltEvent() {
window.dispatchEvent(new CustomEvent(ABORT_STREAM_EVENT));
}
return (
<>
<button
type="button"
onClick={emitHaltEvent}
data-tooltip-id="stop-generation-button"
data-tooltip-content="Stop generating response"
className="border-none text-white/60 cursor-pointer group"
>
<svg
width="28"
height="28"
viewBox="0 0 28 28"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<circle
className="group-hover:stroke-[#46C8FF] stroke-white"
cx="10"
cy="10.562"
r="9"
stroke-width="2"
/>
<rect
className="group-hover:fill-[#46C8FF] fill-white"
x="6.3999"
y="6.96204"
width="7.2"
height="7.2"
rx="2"
/>
</svg>
</button>
<Tooltip
id="stop-generation-button"
place="bottom"
delayShow={300}
className="tooltip !text-xs invert"
/>
</>
);
}

View File

@ -0,0 +1,4 @@
<svg width="21" height="21" viewBox="0 0 21 21" fill="none" xmlns="http://www.w3.org/2000/svg">
<circle cx="10.8984" cy="10.562" r="9" stroke="white" stroke-width="2"/>
<rect x="7.29846" y="6.96204" width="7.2" height="7.2" rx="2" fill="white"/>
</svg>

After

Width:  |  Height:  |  Size: 253 B

View File

@ -1,4 +1,3 @@
import { CircleNotch, PaperPlaneRight } from "@phosphor-icons/react";
import React, { useState, useRef } from "react"; import React, { useState, useRef } from "react";
import SlashCommandsButton, { import SlashCommandsButton, {
SlashCommands, SlashCommands,
@ -6,6 +5,8 @@ import SlashCommandsButton, {
} from "./SlashCommands"; } from "./SlashCommands";
import { isMobile } from "react-device-detect"; import { isMobile } from "react-device-detect";
import debounce from "lodash.debounce"; import debounce from "lodash.debounce";
import { PaperPlaneRight } from "@phosphor-icons/react";
import StopGenerationButton from "./StopGenerationButton";
export default function PromptInput({ export default function PromptInput({
workspace, workspace,
@ -83,19 +84,18 @@ export default function PromptInput({
className="cursor-text max-h-[100px] md:min-h-[40px] mx-2 md:mx-0 py-2 w-full text-[16px] md:text-md text-white bg-transparent placeholder:text-white/60 resize-none active:outline-none focus:outline-none flex-grow" className="cursor-text max-h-[100px] md:min-h-[40px] mx-2 md:mx-0 py-2 w-full text-[16px] md:text-md text-white bg-transparent placeholder:text-white/60 resize-none active:outline-none focus:outline-none flex-grow"
placeholder={"Send a message"} placeholder={"Send a message"}
/> />
{buttonDisabled ? (
<StopGenerationButton />
) : (
<button <button
ref={formRef} ref={formRef}
type="submit" type="submit"
disabled={buttonDisabled}
className="inline-flex justify-center rounded-2xl cursor-pointer text-white/60 hover:text-white group ml-4" className="inline-flex justify-center rounded-2xl cursor-pointer text-white/60 hover:text-white group ml-4"
> >
{buttonDisabled ? (
<CircleNotch className="w-6 h-6 animate-spin" />
) : (
<PaperPlaneRight className="w-7 h-7 my-3" weight="fill" /> <PaperPlaneRight className="w-7 h-7 my-3" weight="fill" />
)}
<span className="sr-only">Send message</span> <span className="sr-only">Send message</span>
</button> </button>
)}
</div> </div>
<div className="flex justify-between py-3.5"> <div className="flex justify-between py-3.5">
<div className="flex gap-x-2"> <div className="flex gap-x-2">

View File

@ -68,11 +68,7 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
const remHistory = chatHistory.length > 0 ? chatHistory.slice(0, -1) : []; const remHistory = chatHistory.length > 0 ? chatHistory.slice(0, -1) : [];
var _chatHistory = [...remHistory]; var _chatHistory = [...remHistory];
if (!promptMessage || !promptMessage?.userMessage) { if (!promptMessage || !promptMessage?.userMessage) return false;
setLoadingResponse(false);
return false;
}
if (!!threadSlug) { if (!!threadSlug) {
await Workspace.threads.streamChat( await Workspace.threads.streamChat(
{ workspaceSlug: workspace.slug, threadSlug }, { workspaceSlug: workspace.slug, threadSlug },

View File

@ -3,6 +3,7 @@ import { baseHeaders } from "@/utils/request";
import { fetchEventSource } from "@microsoft/fetch-event-source"; import { fetchEventSource } from "@microsoft/fetch-event-source";
import WorkspaceThread from "@/models/workspaceThread"; import WorkspaceThread from "@/models/workspaceThread";
import { v4 } from "uuid"; import { v4 } from "uuid";
import { ABORT_STREAM_EVENT } from "@/utils/chat";
const Workspace = { const Workspace = {
new: async function (data = {}) { new: async function (data = {}) {
@ -75,6 +76,16 @@ const Workspace = {
}, },
streamChat: async function ({ slug }, message, handleChat) { streamChat: async function ({ slug }, message, handleChat) {
const ctrl = new AbortController(); const ctrl = new AbortController();
// Listen for the ABORT_STREAM_EVENT key to be emitted by the client
// to early abort the streaming response. On abort we send a special `stopGeneration`
// event to be handled which resets the UI for us to be able to send another message.
// The backend response abort handling is done in each LLM's handleStreamResponse.
window.addEventListener(ABORT_STREAM_EVENT, () => {
ctrl.abort();
handleChat({ id: v4(), type: "stopGeneration" });
});
await fetchEventSource(`${API_BASE}/workspace/${slug}/stream-chat`, { await fetchEventSource(`${API_BASE}/workspace/${slug}/stream-chat`, {
method: "POST", method: "POST",
body: JSON.stringify({ message }), body: JSON.stringify({ message }),

View File

@ -1,3 +1,4 @@
import { ABORT_STREAM_EVENT } from "@/utils/chat";
import { API_BASE } from "@/utils/constants"; import { API_BASE } from "@/utils/constants";
import { baseHeaders } from "@/utils/request"; import { baseHeaders } from "@/utils/request";
import { fetchEventSource } from "@microsoft/fetch-event-source"; import { fetchEventSource } from "@microsoft/fetch-event-source";
@ -80,6 +81,16 @@ const WorkspaceThread = {
handleChat handleChat
) { ) {
const ctrl = new AbortController(); const ctrl = new AbortController();
// Listen for the ABORT_STREAM_EVENT key to be emitted by the client
// to early abort the streaming response. On abort we send a special `stopGeneration`
// event to be handled which resets the UI for us to be able to send another message.
// The backend response abort handling is done in each LLM's handleStreamResponse.
window.addEventListener(ABORT_STREAM_EVENT, () => {
ctrl.abort();
handleChat({ id: v4(), type: "stopGeneration" });
});
await fetchEventSource( await fetchEventSource(
`${API_BASE}/workspace/${workspaceSlug}/thread/${threadSlug}/stream-chat`, `${API_BASE}/workspace/${workspaceSlug}/thread/${threadSlug}/stream-chat`,
{ {

View File

@ -1,3 +1,5 @@
export const ABORT_STREAM_EVENT = "abort-chat-stream";
// For handling of chat responses in the frontend by their various types. // For handling of chat responses in the frontend by their various types.
export default function handleChat( export default function handleChat(
chatResult, chatResult,
@ -108,6 +110,22 @@ export default function handleChat(
_chatHistory[chatIdx] = updatedHistory; _chatHistory[chatIdx] = updatedHistory;
} }
setChatHistory([..._chatHistory]); setChatHistory([..._chatHistory]);
setLoadingResponse(false);
} else if (type === "stopGeneration") {
const chatIdx = _chatHistory.length - 1;
const existingHistory = { ..._chatHistory[chatIdx] };
const updatedHistory = {
...existingHistory,
sources: [],
closed: true,
error: null,
animate: false,
pending: false,
};
_chatHistory[chatIdx] = updatedHistory;
setChatHistory([..._chatHistory]);
setLoadingResponse(false);
} }
} }

View File

@ -1,6 +1,9 @@
const { v4 } = require("uuid"); const { v4 } = require("uuid");
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../helpers/chat/responses"); const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
class AnthropicLLM { class AnthropicLLM {
constructor(embedder = null, modelPreference = null) { constructor(embedder = null, modelPreference = null) {
if (!process.env.ANTHROPIC_API_KEY) if (!process.env.ANTHROPIC_API_KEY)
@ -150,6 +153,13 @@ class AnthropicLLM {
let fullText = ""; let fullText = "";
const { uuid = v4(), sources = [] } = responseProps; const { uuid = v4(), sources = [] } = responseProps;
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
stream.on("streamEvent", (message) => { stream.on("streamEvent", (message) => {
const data = message; const data = message;
if ( if (
@ -181,6 +191,7 @@ class AnthropicLLM {
close: true, close: true,
error: false, error: false,
}); });
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
} }
}); });

View File

@ -1,6 +1,9 @@
const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi"); const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../helpers/chat/responses"); const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
class AzureOpenAiLLM { class AzureOpenAiLLM {
constructor(embedder = null, _modelPreference = null) { constructor(embedder = null, _modelPreference = null) {
@ -174,6 +177,14 @@ class AzureOpenAiLLM {
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
let fullText = ""; let fullText = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
for await (const event of stream) { for await (const event of stream) {
for (const choice of event.choices) { for (const choice of event.choices) {
const delta = choice.delta?.content; const delta = choice.delta?.content;
@ -198,6 +209,7 @@ class AzureOpenAiLLM {
close: true, close: true,
error: false, error: false,
}); });
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
}); });
} }

View File

@ -1,5 +1,8 @@
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../helpers/chat/responses"); const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
class GeminiLLM { class GeminiLLM {
constructor(embedder = null, modelPreference = null) { constructor(embedder = null, modelPreference = null) {
@ -198,6 +201,14 @@ class GeminiLLM {
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
let fullText = ""; let fullText = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
for await (const chunk of stream) { for await (const chunk of stream) {
fullText += chunk.text(); fullText += chunk.text();
writeResponseChunk(response, { writeResponseChunk(response, {
@ -218,6 +229,7 @@ class GeminiLLM {
close: true, close: true,
error: false, error: false,
}); });
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
}); });
} }

View File

@ -1,7 +1,10 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native"); const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi"); const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../helpers/chat/responses"); const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
class HuggingFaceLLM { class HuggingFaceLLM {
constructor(embedder = null, _modelPreference = null) { constructor(embedder = null, _modelPreference = null) {
@ -172,6 +175,14 @@ class HuggingFaceLLM {
return new Promise((resolve) => { return new Promise((resolve) => {
let fullText = ""; let fullText = "";
let chunk = ""; let chunk = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
stream.data.on("data", (data) => { stream.data.on("data", (data) => {
const lines = data const lines = data
?.toString() ?.toString()
@ -218,6 +229,7 @@ class HuggingFaceLLM {
close: true, close: true,
error: false, error: false,
}); });
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
} else { } else {
let error = null; let error = null;
@ -241,6 +253,7 @@ class HuggingFaceLLM {
close: true, close: true,
error, error,
}); });
response.removeListener("close", handleAbort);
resolve(""); resolve("");
return; return;
} }
@ -266,6 +279,7 @@ class HuggingFaceLLM {
close: true, close: true,
error: false, error: false,
}); });
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
} }
} }

View File

@ -2,7 +2,10 @@ const fs = require("fs");
const path = require("path"); const path = require("path");
const { NativeEmbedder } = require("../../EmbeddingEngines/native"); const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../helpers/chat/responses"); const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
// Docs: https://api.js.langchain.com/classes/chat_models_llama_cpp.ChatLlamaCpp.html // Docs: https://api.js.langchain.com/classes/chat_models_llama_cpp.ChatLlamaCpp.html
const ChatLlamaCpp = (...args) => const ChatLlamaCpp = (...args) =>
@ -176,6 +179,14 @@ class NativeLLM {
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
let fullText = ""; let fullText = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
for await (const chunk of stream) { for await (const chunk of stream) {
if (chunk === undefined) if (chunk === undefined)
throw new Error( throw new Error(
@ -202,6 +213,7 @@ class NativeLLM {
close: true, close: true,
error: false, error: false,
}); });
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
}); });
} }

View File

@ -1,6 +1,9 @@
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { StringOutputParser } = require("langchain/schema/output_parser"); const { StringOutputParser } = require("langchain/schema/output_parser");
const { writeResponseChunk } = require("../../helpers/chat/responses"); const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md // Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
class OllamaAILLM { class OllamaAILLM {
@ -180,8 +183,16 @@ class OllamaAILLM {
const { uuid = uuidv4(), sources = [] } = responseProps; const { uuid = uuidv4(), sources = [] } = responseProps;
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
try {
let fullText = ""; let fullText = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
try {
for await (const chunk of stream) { for await (const chunk of stream) {
if (chunk === undefined) if (chunk === undefined)
throw new Error( throw new Error(
@ -210,6 +221,7 @@ class OllamaAILLM {
close: true, close: true,
error: false, error: false,
}); });
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
} catch (error) { } catch (error) {
writeResponseChunk(response, { writeResponseChunk(response, {
@ -222,6 +234,7 @@ class OllamaAILLM {
error?.cause ?? error.message error?.cause ?? error.message
}`, }`,
}); });
response.removeListener("close", handleAbort);
} }
}); });
} }

View File

@ -1,7 +1,10 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native"); const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { v4: uuidv4 } = require("uuid"); const { v4: uuidv4 } = require("uuid");
const { writeResponseChunk } = require("../../helpers/chat/responses"); const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
function openRouterModels() { function openRouterModels() {
const { MODELS } = require("./models.js"); const { MODELS } = require("./models.js");
@ -195,6 +198,13 @@ class OpenRouterLLM {
let chunk = ""; let chunk = "";
let lastChunkTime = null; // null when first token is still not received. let lastChunkTime = null; // null when first token is still not received.
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
// NOTICE: Not all OpenRouter models will return a stop reason // NOTICE: Not all OpenRouter models will return a stop reason
// which keeps the connection open and so the model never finalizes the stream // which keeps the connection open and so the model never finalizes the stream
// like the traditional OpenAI response schema does. So in the case the response stream // like the traditional OpenAI response schema does. So in the case the response stream
@ -220,6 +230,7 @@ class OpenRouterLLM {
error: false, error: false,
}); });
clearInterval(timeoutCheck); clearInterval(timeoutCheck);
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
} }
}, 500); }, 500);
@ -269,6 +280,7 @@ class OpenRouterLLM {
error: false, error: false,
}); });
clearInterval(timeoutCheck); clearInterval(timeoutCheck);
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
} else { } else {
let finishReason = null; let finishReason = null;
@ -305,6 +317,7 @@ class OpenRouterLLM {
error: false, error: false,
}); });
clearInterval(timeoutCheck); clearInterval(timeoutCheck);
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
} }
} }

View File

@ -1,5 +1,8 @@
const { chatPrompt } = require("../../chats"); const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../helpers/chat/responses"); const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
function togetherAiModels() { function togetherAiModels() {
const { MODELS } = require("./models.js"); const { MODELS } = require("./models.js");
@ -185,6 +188,14 @@ class TogetherAiLLM {
return new Promise((resolve) => { return new Promise((resolve) => {
let fullText = ""; let fullText = "";
let chunk = ""; let chunk = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
stream.data.on("data", (data) => { stream.data.on("data", (data) => {
const lines = data const lines = data
?.toString() ?.toString()
@ -230,6 +241,7 @@ class TogetherAiLLM {
close: true, close: true,
error: false, error: false,
}); });
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
} else { } else {
let finishReason = null; let finishReason = null;
@ -263,6 +275,7 @@ class TogetherAiLLM {
close: true, close: true,
error: false, error: false,
}); });
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
} }
} }

View File

@ -1,6 +1,14 @@
const { v4: uuidv4 } = require("uuid"); const { v4: uuidv4 } = require("uuid");
const moment = require("moment"); const moment = require("moment");
function clientAbortedHandler(resolve, fullText) {
console.log(
"\x1b[43m\x1b[34m[STREAM ABORTED]\x1b[0m Client requested to abort stream. Exiting LLM stream handler early."
);
resolve(fullText);
return;
}
// The default way to handle a stream response. Functions best with OpenAI. // The default way to handle a stream response. Functions best with OpenAI.
// Currently used for LMStudio, LocalAI, Mistral API, and OpenAI // Currently used for LMStudio, LocalAI, Mistral API, and OpenAI
function handleDefaultStreamResponse(response, stream, responseProps) { function handleDefaultStreamResponse(response, stream, responseProps) {
@ -9,6 +17,14 @@ function handleDefaultStreamResponse(response, stream, responseProps) {
return new Promise((resolve) => { return new Promise((resolve) => {
let fullText = ""; let fullText = "";
let chunk = ""; let chunk = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
stream.data.on("data", (data) => { stream.data.on("data", (data) => {
const lines = data const lines = data
?.toString() ?.toString()
@ -52,6 +68,7 @@ function handleDefaultStreamResponse(response, stream, responseProps) {
close: true, close: true,
error: false, error: false,
}); });
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
} else { } else {
let finishReason = null; let finishReason = null;
@ -85,6 +102,7 @@ function handleDefaultStreamResponse(response, stream, responseProps) {
close: true, close: true,
error: false, error: false,
}); });
response.removeListener("close", handleAbort);
resolve(fullText); resolve(fullText);
} }
} }
@ -141,4 +159,5 @@ module.exports = {
convertToChatHistory, convertToChatHistory,
convertToPromptHistory, convertToPromptHistory,
writeResponseChunk, writeResponseChunk,
clientAbortedHandler,
}; };