Stop generation button during stream-response (#892)

* Stop generation button during stream-response

* add custom stop icon

* add stop to thread chats
This commit is contained in:
Timothy Carambat 2024-03-12 15:21:27 -07:00 committed by GitHub
parent fd2b13017e
commit 0e46a11cb6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 236 additions and 27 deletions

View File

@ -0,0 +1,50 @@
import { ABORT_STREAM_EVENT } from "@/utils/chat";
import { Tooltip } from "react-tooltip";
export default function StopGenerationButton() {
function emitHaltEvent() {
window.dispatchEvent(new CustomEvent(ABORT_STREAM_EVENT));
}
return (
<>
<button
type="button"
onClick={emitHaltEvent}
data-tooltip-id="stop-generation-button"
data-tooltip-content="Stop generating response"
className="border-none text-white/60 cursor-pointer group"
>
<svg
width="28"
height="28"
viewBox="0 0 28 28"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<circle
className="group-hover:stroke-[#46C8FF] stroke-white"
cx="10"
cy="10.562"
r="9"
stroke-width="2"
/>
<rect
className="group-hover:fill-[#46C8FF] fill-white"
x="6.3999"
y="6.96204"
width="7.2"
height="7.2"
rx="2"
/>
</svg>
</button>
<Tooltip
id="stop-generation-button"
place="bottom"
delayShow={300}
className="tooltip !text-xs invert"
/>
</>
);
}

View File

@ -0,0 +1,4 @@
<svg width="21" height="21" viewBox="0 0 21 21" fill="none" xmlns="http://www.w3.org/2000/svg">
<circle cx="10.8984" cy="10.562" r="9" stroke="white" stroke-width="2"/>
<rect x="7.29846" y="6.96204" width="7.2" height="7.2" rx="2" fill="white"/>
</svg>

After

Width:  |  Height:  |  Size: 253 B

View File

@ -1,4 +1,3 @@
import { CircleNotch, PaperPlaneRight } from "@phosphor-icons/react";
import React, { useState, useRef } from "react";
import SlashCommandsButton, {
SlashCommands,
@ -6,6 +5,8 @@ import SlashCommandsButton, {
} from "./SlashCommands";
import { isMobile } from "react-device-detect";
import debounce from "lodash.debounce";
import { PaperPlaneRight } from "@phosphor-icons/react";
import StopGenerationButton from "./StopGenerationButton";
export default function PromptInput({
workspace,
@ -83,19 +84,18 @@ export default function PromptInput({
className="cursor-text max-h-[100px] md:min-h-[40px] mx-2 md:mx-0 py-2 w-full text-[16px] md:text-md text-white bg-transparent placeholder:text-white/60 resize-none active:outline-none focus:outline-none flex-grow"
placeholder={"Send a message"}
/>
<button
ref={formRef}
type="submit"
disabled={buttonDisabled}
className="inline-flex justify-center rounded-2xl cursor-pointer text-white/60 hover:text-white group ml-4"
>
{buttonDisabled ? (
<CircleNotch className="w-6 h-6 animate-spin" />
) : (
{buttonDisabled ? (
<StopGenerationButton />
) : (
<button
ref={formRef}
type="submit"
className="inline-flex justify-center rounded-2xl cursor-pointer text-white/60 hover:text-white group ml-4"
>
<PaperPlaneRight className="w-7 h-7 my-3" weight="fill" />
)}
<span className="sr-only">Send message</span>
</button>
<span className="sr-only">Send message</span>
</button>
)}
</div>
<div className="flex justify-between py-3.5">
<div className="flex gap-x-2">

View File

@ -68,11 +68,7 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
const remHistory = chatHistory.length > 0 ? chatHistory.slice(0, -1) : [];
var _chatHistory = [...remHistory];
if (!promptMessage || !promptMessage?.userMessage) {
setLoadingResponse(false);
return false;
}
if (!promptMessage || !promptMessage?.userMessage) return false;
if (!!threadSlug) {
await Workspace.threads.streamChat(
{ workspaceSlug: workspace.slug, threadSlug },

View File

@ -3,6 +3,7 @@ import { baseHeaders } from "@/utils/request";
import { fetchEventSource } from "@microsoft/fetch-event-source";
import WorkspaceThread from "@/models/workspaceThread";
import { v4 } from "uuid";
import { ABORT_STREAM_EVENT } from "@/utils/chat";
const Workspace = {
new: async function (data = {}) {
@ -75,6 +76,16 @@ const Workspace = {
},
streamChat: async function ({ slug }, message, handleChat) {
const ctrl = new AbortController();
// Listen for the ABORT_STREAM_EVENT key to be emitted by the client
// to early abort the streaming response. On abort we send a special `stopGeneration`
// event to be handled which resets the UI for us to be able to send another message.
// The backend response abort handling is done in each LLM's handleStreamResponse.
window.addEventListener(ABORT_STREAM_EVENT, () => {
ctrl.abort();
handleChat({ id: v4(), type: "stopGeneration" });
});
await fetchEventSource(`${API_BASE}/workspace/${slug}/stream-chat`, {
method: "POST",
body: JSON.stringify({ message }),

View File

@ -1,3 +1,4 @@
import { ABORT_STREAM_EVENT } from "@/utils/chat";
import { API_BASE } from "@/utils/constants";
import { baseHeaders } from "@/utils/request";
import { fetchEventSource } from "@microsoft/fetch-event-source";
@ -80,6 +81,16 @@ const WorkspaceThread = {
handleChat
) {
const ctrl = new AbortController();
// Listen for the ABORT_STREAM_EVENT key to be emitted by the client
// to early abort the streaming response. On abort we send a special `stopGeneration`
// event to be handled which resets the UI for us to be able to send another message.
// The backend response abort handling is done in each LLM's handleStreamResponse.
window.addEventListener(ABORT_STREAM_EVENT, () => {
ctrl.abort();
handleChat({ id: v4(), type: "stopGeneration" });
});
await fetchEventSource(
`${API_BASE}/workspace/${workspaceSlug}/thread/${threadSlug}/stream-chat`,
{

View File

@ -1,3 +1,5 @@
export const ABORT_STREAM_EVENT = "abort-chat-stream";
// For handling of chat responses in the frontend by their various types.
export default function handleChat(
chatResult,
@ -108,6 +110,22 @@ export default function handleChat(
_chatHistory[chatIdx] = updatedHistory;
}
setChatHistory([..._chatHistory]);
setLoadingResponse(false);
} else if (type === "stopGeneration") {
const chatIdx = _chatHistory.length - 1;
const existingHistory = { ..._chatHistory[chatIdx] };
const updatedHistory = {
...existingHistory,
sources: [],
closed: true,
error: null,
animate: false,
pending: false,
};
_chatHistory[chatIdx] = updatedHistory;
setChatHistory([..._chatHistory]);
setLoadingResponse(false);
}
}

View File

@ -1,6 +1,9 @@
const { v4 } = require("uuid");
const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../helpers/chat/responses");
const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
class AnthropicLLM {
constructor(embedder = null, modelPreference = null) {
if (!process.env.ANTHROPIC_API_KEY)
@ -150,6 +153,13 @@ class AnthropicLLM {
let fullText = "";
const { uuid = v4(), sources = [] } = responseProps;
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
stream.on("streamEvent", (message) => {
const data = message;
if (
@ -181,6 +191,7 @@ class AnthropicLLM {
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
}
});

View File

@ -1,6 +1,9 @@
const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../helpers/chat/responses");
const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
class AzureOpenAiLLM {
constructor(embedder = null, _modelPreference = null) {
@ -174,6 +177,14 @@ class AzureOpenAiLLM {
return new Promise(async (resolve) => {
let fullText = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
for await (const event of stream) {
for (const choice of event.choices) {
const delta = choice.delta?.content;
@ -198,6 +209,7 @@ class AzureOpenAiLLM {
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
});
}

View File

@ -1,5 +1,8 @@
const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../helpers/chat/responses");
const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
class GeminiLLM {
constructor(embedder = null, modelPreference = null) {
@ -198,6 +201,14 @@ class GeminiLLM {
return new Promise(async (resolve) => {
let fullText = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
for await (const chunk of stream) {
fullText += chunk.text();
writeResponseChunk(response, {
@ -218,6 +229,7 @@ class GeminiLLM {
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
});
}

View File

@ -1,7 +1,10 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../helpers/chat/responses");
const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
class HuggingFaceLLM {
constructor(embedder = null, _modelPreference = null) {
@ -172,6 +175,14 @@ class HuggingFaceLLM {
return new Promise((resolve) => {
let fullText = "";
let chunk = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
stream.data.on("data", (data) => {
const lines = data
?.toString()
@ -218,6 +229,7 @@ class HuggingFaceLLM {
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
} else {
let error = null;
@ -241,6 +253,7 @@ class HuggingFaceLLM {
close: true,
error,
});
response.removeListener("close", handleAbort);
resolve("");
return;
}
@ -266,6 +279,7 @@ class HuggingFaceLLM {
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
}
}

View File

@ -2,7 +2,10 @@ const fs = require("fs");
const path = require("path");
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../helpers/chat/responses");
const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
// Docs: https://api.js.langchain.com/classes/chat_models_llama_cpp.ChatLlamaCpp.html
const ChatLlamaCpp = (...args) =>
@ -176,6 +179,14 @@ class NativeLLM {
return new Promise(async (resolve) => {
let fullText = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
for await (const chunk of stream) {
if (chunk === undefined)
throw new Error(
@ -202,6 +213,7 @@ class NativeLLM {
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
});
}

View File

@ -1,6 +1,9 @@
const { chatPrompt } = require("../../chats");
const { StringOutputParser } = require("langchain/schema/output_parser");
const { writeResponseChunk } = require("../../helpers/chat/responses");
const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
class OllamaAILLM {
@ -180,8 +183,16 @@ class OllamaAILLM {
const { uuid = uuidv4(), sources = [] } = responseProps;
return new Promise(async (resolve) => {
let fullText = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
try {
let fullText = "";
for await (const chunk of stream) {
if (chunk === undefined)
throw new Error(
@ -210,6 +221,7 @@ class OllamaAILLM {
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
} catch (error) {
writeResponseChunk(response, {
@ -222,6 +234,7 @@ class OllamaAILLM {
error?.cause ?? error.message
}`,
});
response.removeListener("close", handleAbort);
}
});
}

View File

@ -1,7 +1,10 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { chatPrompt } = require("../../chats");
const { v4: uuidv4 } = require("uuid");
const { writeResponseChunk } = require("../../helpers/chat/responses");
const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
function openRouterModels() {
const { MODELS } = require("./models.js");
@ -195,6 +198,13 @@ class OpenRouterLLM {
let chunk = "";
let lastChunkTime = null; // null when first token is still not received.
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
// NOTICE: Not all OpenRouter models will return a stop reason
// which keeps the connection open and so the model never finalizes the stream
// like the traditional OpenAI response schema does. So in the case the response stream
@ -220,6 +230,7 @@ class OpenRouterLLM {
error: false,
});
clearInterval(timeoutCheck);
response.removeListener("close", handleAbort);
resolve(fullText);
}
}, 500);
@ -269,6 +280,7 @@ class OpenRouterLLM {
error: false,
});
clearInterval(timeoutCheck);
response.removeListener("close", handleAbort);
resolve(fullText);
} else {
let finishReason = null;
@ -305,6 +317,7 @@ class OpenRouterLLM {
error: false,
});
clearInterval(timeoutCheck);
response.removeListener("close", handleAbort);
resolve(fullText);
}
}

View File

@ -1,5 +1,8 @@
const { chatPrompt } = require("../../chats");
const { writeResponseChunk } = require("../../helpers/chat/responses");
const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
function togetherAiModels() {
const { MODELS } = require("./models.js");
@ -185,6 +188,14 @@ class TogetherAiLLM {
return new Promise((resolve) => {
let fullText = "";
let chunk = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
stream.data.on("data", (data) => {
const lines = data
?.toString()
@ -230,6 +241,7 @@ class TogetherAiLLM {
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
} else {
let finishReason = null;
@ -263,6 +275,7 @@ class TogetherAiLLM {
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
}
}

View File

@ -1,6 +1,14 @@
const { v4: uuidv4 } = require("uuid");
const moment = require("moment");
function clientAbortedHandler(resolve, fullText) {
console.log(
"\x1b[43m\x1b[34m[STREAM ABORTED]\x1b[0m Client requested to abort stream. Exiting LLM stream handler early."
);
resolve(fullText);
return;
}
// The default way to handle a stream response. Functions best with OpenAI.
// Currently used for LMStudio, LocalAI, Mistral API, and OpenAI
function handleDefaultStreamResponse(response, stream, responseProps) {
@ -9,6 +17,14 @@ function handleDefaultStreamResponse(response, stream, responseProps) {
return new Promise((resolve) => {
let fullText = "";
let chunk = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
stream.data.on("data", (data) => {
const lines = data
?.toString()
@ -52,6 +68,7 @@ function handleDefaultStreamResponse(response, stream, responseProps) {
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
} else {
let finishReason = null;
@ -85,6 +102,7 @@ function handleDefaultStreamResponse(response, stream, responseProps) {
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
}
}
@ -141,4 +159,5 @@ module.exports = {
convertToChatHistory,
convertToPromptHistory,
writeResponseChunk,
clientAbortedHandler,
};