mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-18 20:20:11 +01:00
Stop generation button during stream-response (#892)
* Stop generation button during stream-response * add custom stop icon * add stop to thread chats
This commit is contained in:
parent
fd2b13017e
commit
0e46a11cb6
@ -0,0 +1,50 @@
|
|||||||
|
import { ABORT_STREAM_EVENT } from "@/utils/chat";
|
||||||
|
import { Tooltip } from "react-tooltip";
|
||||||
|
|
||||||
|
export default function StopGenerationButton() {
|
||||||
|
function emitHaltEvent() {
|
||||||
|
window.dispatchEvent(new CustomEvent(ABORT_STREAM_EVENT));
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={emitHaltEvent}
|
||||||
|
data-tooltip-id="stop-generation-button"
|
||||||
|
data-tooltip-content="Stop generating response"
|
||||||
|
className="border-none text-white/60 cursor-pointer group"
|
||||||
|
>
|
||||||
|
<svg
|
||||||
|
width="28"
|
||||||
|
height="28"
|
||||||
|
viewBox="0 0 28 28"
|
||||||
|
fill="none"
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
>
|
||||||
|
<circle
|
||||||
|
className="group-hover:stroke-[#46C8FF] stroke-white"
|
||||||
|
cx="10"
|
||||||
|
cy="10.562"
|
||||||
|
r="9"
|
||||||
|
stroke-width="2"
|
||||||
|
/>
|
||||||
|
<rect
|
||||||
|
className="group-hover:fill-[#46C8FF] fill-white"
|
||||||
|
x="6.3999"
|
||||||
|
y="6.96204"
|
||||||
|
width="7.2"
|
||||||
|
height="7.2"
|
||||||
|
rx="2"
|
||||||
|
/>
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
<Tooltip
|
||||||
|
id="stop-generation-button"
|
||||||
|
place="bottom"
|
||||||
|
delayShow={300}
|
||||||
|
className="tooltip !text-xs invert"
|
||||||
|
/>
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
@ -0,0 +1,4 @@
|
|||||||
|
<svg width="21" height="21" viewBox="0 0 21 21" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<circle cx="10.8984" cy="10.562" r="9" stroke="white" stroke-width="2"/>
|
||||||
|
<rect x="7.29846" y="6.96204" width="7.2" height="7.2" rx="2" fill="white"/>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 253 B |
@ -1,4 +1,3 @@
|
|||||||
import { CircleNotch, PaperPlaneRight } from "@phosphor-icons/react";
|
|
||||||
import React, { useState, useRef } from "react";
|
import React, { useState, useRef } from "react";
|
||||||
import SlashCommandsButton, {
|
import SlashCommandsButton, {
|
||||||
SlashCommands,
|
SlashCommands,
|
||||||
@ -6,6 +5,8 @@ import SlashCommandsButton, {
|
|||||||
} from "./SlashCommands";
|
} from "./SlashCommands";
|
||||||
import { isMobile } from "react-device-detect";
|
import { isMobile } from "react-device-detect";
|
||||||
import debounce from "lodash.debounce";
|
import debounce from "lodash.debounce";
|
||||||
|
import { PaperPlaneRight } from "@phosphor-icons/react";
|
||||||
|
import StopGenerationButton from "./StopGenerationButton";
|
||||||
|
|
||||||
export default function PromptInput({
|
export default function PromptInput({
|
||||||
workspace,
|
workspace,
|
||||||
@ -83,19 +84,18 @@ export default function PromptInput({
|
|||||||
className="cursor-text max-h-[100px] md:min-h-[40px] mx-2 md:mx-0 py-2 w-full text-[16px] md:text-md text-white bg-transparent placeholder:text-white/60 resize-none active:outline-none focus:outline-none flex-grow"
|
className="cursor-text max-h-[100px] md:min-h-[40px] mx-2 md:mx-0 py-2 w-full text-[16px] md:text-md text-white bg-transparent placeholder:text-white/60 resize-none active:outline-none focus:outline-none flex-grow"
|
||||||
placeholder={"Send a message"}
|
placeholder={"Send a message"}
|
||||||
/>
|
/>
|
||||||
|
{buttonDisabled ? (
|
||||||
|
<StopGenerationButton />
|
||||||
|
) : (
|
||||||
<button
|
<button
|
||||||
ref={formRef}
|
ref={formRef}
|
||||||
type="submit"
|
type="submit"
|
||||||
disabled={buttonDisabled}
|
|
||||||
className="inline-flex justify-center rounded-2xl cursor-pointer text-white/60 hover:text-white group ml-4"
|
className="inline-flex justify-center rounded-2xl cursor-pointer text-white/60 hover:text-white group ml-4"
|
||||||
>
|
>
|
||||||
{buttonDisabled ? (
|
|
||||||
<CircleNotch className="w-6 h-6 animate-spin" />
|
|
||||||
) : (
|
|
||||||
<PaperPlaneRight className="w-7 h-7 my-3" weight="fill" />
|
<PaperPlaneRight className="w-7 h-7 my-3" weight="fill" />
|
||||||
)}
|
|
||||||
<span className="sr-only">Send message</span>
|
<span className="sr-only">Send message</span>
|
||||||
</button>
|
</button>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
<div className="flex justify-between py-3.5">
|
<div className="flex justify-between py-3.5">
|
||||||
<div className="flex gap-x-2">
|
<div className="flex gap-x-2">
|
||||||
|
@ -68,11 +68,7 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
|
|||||||
const remHistory = chatHistory.length > 0 ? chatHistory.slice(0, -1) : [];
|
const remHistory = chatHistory.length > 0 ? chatHistory.slice(0, -1) : [];
|
||||||
var _chatHistory = [...remHistory];
|
var _chatHistory = [...remHistory];
|
||||||
|
|
||||||
if (!promptMessage || !promptMessage?.userMessage) {
|
if (!promptMessage || !promptMessage?.userMessage) return false;
|
||||||
setLoadingResponse(false);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!!threadSlug) {
|
if (!!threadSlug) {
|
||||||
await Workspace.threads.streamChat(
|
await Workspace.threads.streamChat(
|
||||||
{ workspaceSlug: workspace.slug, threadSlug },
|
{ workspaceSlug: workspace.slug, threadSlug },
|
||||||
|
@ -3,6 +3,7 @@ import { baseHeaders } from "@/utils/request";
|
|||||||
import { fetchEventSource } from "@microsoft/fetch-event-source";
|
import { fetchEventSource } from "@microsoft/fetch-event-source";
|
||||||
import WorkspaceThread from "@/models/workspaceThread";
|
import WorkspaceThread from "@/models/workspaceThread";
|
||||||
import { v4 } from "uuid";
|
import { v4 } from "uuid";
|
||||||
|
import { ABORT_STREAM_EVENT } from "@/utils/chat";
|
||||||
|
|
||||||
const Workspace = {
|
const Workspace = {
|
||||||
new: async function (data = {}) {
|
new: async function (data = {}) {
|
||||||
@ -75,6 +76,16 @@ const Workspace = {
|
|||||||
},
|
},
|
||||||
streamChat: async function ({ slug }, message, handleChat) {
|
streamChat: async function ({ slug }, message, handleChat) {
|
||||||
const ctrl = new AbortController();
|
const ctrl = new AbortController();
|
||||||
|
|
||||||
|
// Listen for the ABORT_STREAM_EVENT key to be emitted by the client
|
||||||
|
// to early abort the streaming response. On abort we send a special `stopGeneration`
|
||||||
|
// event to be handled which resets the UI for us to be able to send another message.
|
||||||
|
// The backend response abort handling is done in each LLM's handleStreamResponse.
|
||||||
|
window.addEventListener(ABORT_STREAM_EVENT, () => {
|
||||||
|
ctrl.abort();
|
||||||
|
handleChat({ id: v4(), type: "stopGeneration" });
|
||||||
|
});
|
||||||
|
|
||||||
await fetchEventSource(`${API_BASE}/workspace/${slug}/stream-chat`, {
|
await fetchEventSource(`${API_BASE}/workspace/${slug}/stream-chat`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
body: JSON.stringify({ message }),
|
body: JSON.stringify({ message }),
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import { ABORT_STREAM_EVENT } from "@/utils/chat";
|
||||||
import { API_BASE } from "@/utils/constants";
|
import { API_BASE } from "@/utils/constants";
|
||||||
import { baseHeaders } from "@/utils/request";
|
import { baseHeaders } from "@/utils/request";
|
||||||
import { fetchEventSource } from "@microsoft/fetch-event-source";
|
import { fetchEventSource } from "@microsoft/fetch-event-source";
|
||||||
@ -80,6 +81,16 @@ const WorkspaceThread = {
|
|||||||
handleChat
|
handleChat
|
||||||
) {
|
) {
|
||||||
const ctrl = new AbortController();
|
const ctrl = new AbortController();
|
||||||
|
|
||||||
|
// Listen for the ABORT_STREAM_EVENT key to be emitted by the client
|
||||||
|
// to early abort the streaming response. On abort we send a special `stopGeneration`
|
||||||
|
// event to be handled which resets the UI for us to be able to send another message.
|
||||||
|
// The backend response abort handling is done in each LLM's handleStreamResponse.
|
||||||
|
window.addEventListener(ABORT_STREAM_EVENT, () => {
|
||||||
|
ctrl.abort();
|
||||||
|
handleChat({ id: v4(), type: "stopGeneration" });
|
||||||
|
});
|
||||||
|
|
||||||
await fetchEventSource(
|
await fetchEventSource(
|
||||||
`${API_BASE}/workspace/${workspaceSlug}/thread/${threadSlug}/stream-chat`,
|
`${API_BASE}/workspace/${workspaceSlug}/thread/${threadSlug}/stream-chat`,
|
||||||
{
|
{
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
export const ABORT_STREAM_EVENT = "abort-chat-stream";
|
||||||
|
|
||||||
// For handling of chat responses in the frontend by their various types.
|
// For handling of chat responses in the frontend by their various types.
|
||||||
export default function handleChat(
|
export default function handleChat(
|
||||||
chatResult,
|
chatResult,
|
||||||
@ -108,6 +110,22 @@ export default function handleChat(
|
|||||||
_chatHistory[chatIdx] = updatedHistory;
|
_chatHistory[chatIdx] = updatedHistory;
|
||||||
}
|
}
|
||||||
setChatHistory([..._chatHistory]);
|
setChatHistory([..._chatHistory]);
|
||||||
|
setLoadingResponse(false);
|
||||||
|
} else if (type === "stopGeneration") {
|
||||||
|
const chatIdx = _chatHistory.length - 1;
|
||||||
|
const existingHistory = { ..._chatHistory[chatIdx] };
|
||||||
|
const updatedHistory = {
|
||||||
|
...existingHistory,
|
||||||
|
sources: [],
|
||||||
|
closed: true,
|
||||||
|
error: null,
|
||||||
|
animate: false,
|
||||||
|
pending: false,
|
||||||
|
};
|
||||||
|
_chatHistory[chatIdx] = updatedHistory;
|
||||||
|
|
||||||
|
setChatHistory([..._chatHistory]);
|
||||||
|
setLoadingResponse(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
const { v4 } = require("uuid");
|
const { v4 } = require("uuid");
|
||||||
const { chatPrompt } = require("../../chats");
|
const { chatPrompt } = require("../../chats");
|
||||||
const { writeResponseChunk } = require("../../helpers/chat/responses");
|
const {
|
||||||
|
writeResponseChunk,
|
||||||
|
clientAbortedHandler,
|
||||||
|
} = require("../../helpers/chat/responses");
|
||||||
class AnthropicLLM {
|
class AnthropicLLM {
|
||||||
constructor(embedder = null, modelPreference = null) {
|
constructor(embedder = null, modelPreference = null) {
|
||||||
if (!process.env.ANTHROPIC_API_KEY)
|
if (!process.env.ANTHROPIC_API_KEY)
|
||||||
@ -150,6 +153,13 @@ class AnthropicLLM {
|
|||||||
let fullText = "";
|
let fullText = "";
|
||||||
const { uuid = v4(), sources = [] } = responseProps;
|
const { uuid = v4(), sources = [] } = responseProps;
|
||||||
|
|
||||||
|
// Establish listener to early-abort a streaming response
|
||||||
|
// in case things go sideways or the user does not like the response.
|
||||||
|
// We preserve the generated text but continue as if chat was completed
|
||||||
|
// to preserve previously generated content.
|
||||||
|
const handleAbort = () => clientAbortedHandler(resolve, fullText);
|
||||||
|
response.on("close", handleAbort);
|
||||||
|
|
||||||
stream.on("streamEvent", (message) => {
|
stream.on("streamEvent", (message) => {
|
||||||
const data = message;
|
const data = message;
|
||||||
if (
|
if (
|
||||||
@ -181,6 +191,7 @@ class AnthropicLLM {
|
|||||||
close: true,
|
close: true,
|
||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
|
const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
|
||||||
const { chatPrompt } = require("../../chats");
|
const { chatPrompt } = require("../../chats");
|
||||||
const { writeResponseChunk } = require("../../helpers/chat/responses");
|
const {
|
||||||
|
writeResponseChunk,
|
||||||
|
clientAbortedHandler,
|
||||||
|
} = require("../../helpers/chat/responses");
|
||||||
|
|
||||||
class AzureOpenAiLLM {
|
class AzureOpenAiLLM {
|
||||||
constructor(embedder = null, _modelPreference = null) {
|
constructor(embedder = null, _modelPreference = null) {
|
||||||
@ -174,6 +177,14 @@ class AzureOpenAiLLM {
|
|||||||
|
|
||||||
return new Promise(async (resolve) => {
|
return new Promise(async (resolve) => {
|
||||||
let fullText = "";
|
let fullText = "";
|
||||||
|
|
||||||
|
// Establish listener to early-abort a streaming response
|
||||||
|
// in case things go sideways or the user does not like the response.
|
||||||
|
// We preserve the generated text but continue as if chat was completed
|
||||||
|
// to preserve previously generated content.
|
||||||
|
const handleAbort = () => clientAbortedHandler(resolve, fullText);
|
||||||
|
response.on("close", handleAbort);
|
||||||
|
|
||||||
for await (const event of stream) {
|
for await (const event of stream) {
|
||||||
for (const choice of event.choices) {
|
for (const choice of event.choices) {
|
||||||
const delta = choice.delta?.content;
|
const delta = choice.delta?.content;
|
||||||
@ -198,6 +209,7 @@ class AzureOpenAiLLM {
|
|||||||
close: true,
|
close: true,
|
||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
const { chatPrompt } = require("../../chats");
|
const { chatPrompt } = require("../../chats");
|
||||||
const { writeResponseChunk } = require("../../helpers/chat/responses");
|
const {
|
||||||
|
writeResponseChunk,
|
||||||
|
clientAbortedHandler,
|
||||||
|
} = require("../../helpers/chat/responses");
|
||||||
|
|
||||||
class GeminiLLM {
|
class GeminiLLM {
|
||||||
constructor(embedder = null, modelPreference = null) {
|
constructor(embedder = null, modelPreference = null) {
|
||||||
@ -198,6 +201,14 @@ class GeminiLLM {
|
|||||||
|
|
||||||
return new Promise(async (resolve) => {
|
return new Promise(async (resolve) => {
|
||||||
let fullText = "";
|
let fullText = "";
|
||||||
|
|
||||||
|
// Establish listener to early-abort a streaming response
|
||||||
|
// in case things go sideways or the user does not like the response.
|
||||||
|
// We preserve the generated text but continue as if chat was completed
|
||||||
|
// to preserve previously generated content.
|
||||||
|
const handleAbort = () => clientAbortedHandler(resolve, fullText);
|
||||||
|
response.on("close", handleAbort);
|
||||||
|
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
fullText += chunk.text();
|
fullText += chunk.text();
|
||||||
writeResponseChunk(response, {
|
writeResponseChunk(response, {
|
||||||
@ -218,6 +229,7 @@ class GeminiLLM {
|
|||||||
close: true,
|
close: true,
|
||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
||||||
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
|
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
|
||||||
const { chatPrompt } = require("../../chats");
|
const { chatPrompt } = require("../../chats");
|
||||||
const { writeResponseChunk } = require("../../helpers/chat/responses");
|
const {
|
||||||
|
writeResponseChunk,
|
||||||
|
clientAbortedHandler,
|
||||||
|
} = require("../../helpers/chat/responses");
|
||||||
|
|
||||||
class HuggingFaceLLM {
|
class HuggingFaceLLM {
|
||||||
constructor(embedder = null, _modelPreference = null) {
|
constructor(embedder = null, _modelPreference = null) {
|
||||||
@ -172,6 +175,14 @@ class HuggingFaceLLM {
|
|||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
let fullText = "";
|
let fullText = "";
|
||||||
let chunk = "";
|
let chunk = "";
|
||||||
|
|
||||||
|
// Establish listener to early-abort a streaming response
|
||||||
|
// in case things go sideways or the user does not like the response.
|
||||||
|
// We preserve the generated text but continue as if chat was completed
|
||||||
|
// to preserve previously generated content.
|
||||||
|
const handleAbort = () => clientAbortedHandler(resolve, fullText);
|
||||||
|
response.on("close", handleAbort);
|
||||||
|
|
||||||
stream.data.on("data", (data) => {
|
stream.data.on("data", (data) => {
|
||||||
const lines = data
|
const lines = data
|
||||||
?.toString()
|
?.toString()
|
||||||
@ -218,6 +229,7 @@ class HuggingFaceLLM {
|
|||||||
close: true,
|
close: true,
|
||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
} else {
|
} else {
|
||||||
let error = null;
|
let error = null;
|
||||||
@ -241,6 +253,7 @@ class HuggingFaceLLM {
|
|||||||
close: true,
|
close: true,
|
||||||
error,
|
error,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve("");
|
resolve("");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -266,6 +279,7 @@ class HuggingFaceLLM {
|
|||||||
close: true,
|
close: true,
|
||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,10 @@ const fs = require("fs");
|
|||||||
const path = require("path");
|
const path = require("path");
|
||||||
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
||||||
const { chatPrompt } = require("../../chats");
|
const { chatPrompt } = require("../../chats");
|
||||||
const { writeResponseChunk } = require("../../helpers/chat/responses");
|
const {
|
||||||
|
writeResponseChunk,
|
||||||
|
clientAbortedHandler,
|
||||||
|
} = require("../../helpers/chat/responses");
|
||||||
|
|
||||||
// Docs: https://api.js.langchain.com/classes/chat_models_llama_cpp.ChatLlamaCpp.html
|
// Docs: https://api.js.langchain.com/classes/chat_models_llama_cpp.ChatLlamaCpp.html
|
||||||
const ChatLlamaCpp = (...args) =>
|
const ChatLlamaCpp = (...args) =>
|
||||||
@ -176,6 +179,14 @@ class NativeLLM {
|
|||||||
|
|
||||||
return new Promise(async (resolve) => {
|
return new Promise(async (resolve) => {
|
||||||
let fullText = "";
|
let fullText = "";
|
||||||
|
|
||||||
|
// Establish listener to early-abort a streaming response
|
||||||
|
// in case things go sideways or the user does not like the response.
|
||||||
|
// We preserve the generated text but continue as if chat was completed
|
||||||
|
// to preserve previously generated content.
|
||||||
|
const handleAbort = () => clientAbortedHandler(resolve, fullText);
|
||||||
|
response.on("close", handleAbort);
|
||||||
|
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
if (chunk === undefined)
|
if (chunk === undefined)
|
||||||
throw new Error(
|
throw new Error(
|
||||||
@ -202,6 +213,7 @@ class NativeLLM {
|
|||||||
close: true,
|
close: true,
|
||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
const { chatPrompt } = require("../../chats");
|
const { chatPrompt } = require("../../chats");
|
||||||
const { StringOutputParser } = require("langchain/schema/output_parser");
|
const { StringOutputParser } = require("langchain/schema/output_parser");
|
||||||
const { writeResponseChunk } = require("../../helpers/chat/responses");
|
const {
|
||||||
|
writeResponseChunk,
|
||||||
|
clientAbortedHandler,
|
||||||
|
} = require("../../helpers/chat/responses");
|
||||||
|
|
||||||
// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
|
// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
|
||||||
class OllamaAILLM {
|
class OllamaAILLM {
|
||||||
@ -180,8 +183,16 @@ class OllamaAILLM {
|
|||||||
const { uuid = uuidv4(), sources = [] } = responseProps;
|
const { uuid = uuidv4(), sources = [] } = responseProps;
|
||||||
|
|
||||||
return new Promise(async (resolve) => {
|
return new Promise(async (resolve) => {
|
||||||
try {
|
|
||||||
let fullText = "";
|
let fullText = "";
|
||||||
|
|
||||||
|
// Establish listener to early-abort a streaming response
|
||||||
|
// in case things go sideways or the user does not like the response.
|
||||||
|
// We preserve the generated text but continue as if chat was completed
|
||||||
|
// to preserve previously generated content.
|
||||||
|
const handleAbort = () => clientAbortedHandler(resolve, fullText);
|
||||||
|
response.on("close", handleAbort);
|
||||||
|
|
||||||
|
try {
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
if (chunk === undefined)
|
if (chunk === undefined)
|
||||||
throw new Error(
|
throw new Error(
|
||||||
@ -210,6 +221,7 @@ class OllamaAILLM {
|
|||||||
close: true,
|
close: true,
|
||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
writeResponseChunk(response, {
|
writeResponseChunk(response, {
|
||||||
@ -222,6 +234,7 @@ class OllamaAILLM {
|
|||||||
error?.cause ?? error.message
|
error?.cause ?? error.message
|
||||||
}`,
|
}`,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
||||||
const { chatPrompt } = require("../../chats");
|
const { chatPrompt } = require("../../chats");
|
||||||
const { v4: uuidv4 } = require("uuid");
|
const { v4: uuidv4 } = require("uuid");
|
||||||
const { writeResponseChunk } = require("../../helpers/chat/responses");
|
const {
|
||||||
|
writeResponseChunk,
|
||||||
|
clientAbortedHandler,
|
||||||
|
} = require("../../helpers/chat/responses");
|
||||||
|
|
||||||
function openRouterModels() {
|
function openRouterModels() {
|
||||||
const { MODELS } = require("./models.js");
|
const { MODELS } = require("./models.js");
|
||||||
@ -195,6 +198,13 @@ class OpenRouterLLM {
|
|||||||
let chunk = "";
|
let chunk = "";
|
||||||
let lastChunkTime = null; // null when first token is still not received.
|
let lastChunkTime = null; // null when first token is still not received.
|
||||||
|
|
||||||
|
// Establish listener to early-abort a streaming response
|
||||||
|
// in case things go sideways or the user does not like the response.
|
||||||
|
// We preserve the generated text but continue as if chat was completed
|
||||||
|
// to preserve previously generated content.
|
||||||
|
const handleAbort = () => clientAbortedHandler(resolve, fullText);
|
||||||
|
response.on("close", handleAbort);
|
||||||
|
|
||||||
// NOTICE: Not all OpenRouter models will return a stop reason
|
// NOTICE: Not all OpenRouter models will return a stop reason
|
||||||
// which keeps the connection open and so the model never finalizes the stream
|
// which keeps the connection open and so the model never finalizes the stream
|
||||||
// like the traditional OpenAI response schema does. So in the case the response stream
|
// like the traditional OpenAI response schema does. So in the case the response stream
|
||||||
@ -220,6 +230,7 @@ class OpenRouterLLM {
|
|||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
clearInterval(timeoutCheck);
|
clearInterval(timeoutCheck);
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
}
|
}
|
||||||
}, 500);
|
}, 500);
|
||||||
@ -269,6 +280,7 @@ class OpenRouterLLM {
|
|||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
clearInterval(timeoutCheck);
|
clearInterval(timeoutCheck);
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
} else {
|
} else {
|
||||||
let finishReason = null;
|
let finishReason = null;
|
||||||
@ -305,6 +317,7 @@ class OpenRouterLLM {
|
|||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
clearInterval(timeoutCheck);
|
clearInterval(timeoutCheck);
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
const { chatPrompt } = require("../../chats");
|
const { chatPrompt } = require("../../chats");
|
||||||
const { writeResponseChunk } = require("../../helpers/chat/responses");
|
const {
|
||||||
|
writeResponseChunk,
|
||||||
|
clientAbortedHandler,
|
||||||
|
} = require("../../helpers/chat/responses");
|
||||||
|
|
||||||
function togetherAiModels() {
|
function togetherAiModels() {
|
||||||
const { MODELS } = require("./models.js");
|
const { MODELS } = require("./models.js");
|
||||||
@ -185,6 +188,14 @@ class TogetherAiLLM {
|
|||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
let fullText = "";
|
let fullText = "";
|
||||||
let chunk = "";
|
let chunk = "";
|
||||||
|
|
||||||
|
// Establish listener to early-abort a streaming response
|
||||||
|
// in case things go sideways or the user does not like the response.
|
||||||
|
// We preserve the generated text but continue as if chat was completed
|
||||||
|
// to preserve previously generated content.
|
||||||
|
const handleAbort = () => clientAbortedHandler(resolve, fullText);
|
||||||
|
response.on("close", handleAbort);
|
||||||
|
|
||||||
stream.data.on("data", (data) => {
|
stream.data.on("data", (data) => {
|
||||||
const lines = data
|
const lines = data
|
||||||
?.toString()
|
?.toString()
|
||||||
@ -230,6 +241,7 @@ class TogetherAiLLM {
|
|||||||
close: true,
|
close: true,
|
||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
} else {
|
} else {
|
||||||
let finishReason = null;
|
let finishReason = null;
|
||||||
@ -263,6 +275,7 @@ class TogetherAiLLM {
|
|||||||
close: true,
|
close: true,
|
||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,14 @@
|
|||||||
const { v4: uuidv4 } = require("uuid");
|
const { v4: uuidv4 } = require("uuid");
|
||||||
const moment = require("moment");
|
const moment = require("moment");
|
||||||
|
|
||||||
|
function clientAbortedHandler(resolve, fullText) {
|
||||||
|
console.log(
|
||||||
|
"\x1b[43m\x1b[34m[STREAM ABORTED]\x1b[0m Client requested to abort stream. Exiting LLM stream handler early."
|
||||||
|
);
|
||||||
|
resolve(fullText);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// The default way to handle a stream response. Functions best with OpenAI.
|
// The default way to handle a stream response. Functions best with OpenAI.
|
||||||
// Currently used for LMStudio, LocalAI, Mistral API, and OpenAI
|
// Currently used for LMStudio, LocalAI, Mistral API, and OpenAI
|
||||||
function handleDefaultStreamResponse(response, stream, responseProps) {
|
function handleDefaultStreamResponse(response, stream, responseProps) {
|
||||||
@ -9,6 +17,14 @@ function handleDefaultStreamResponse(response, stream, responseProps) {
|
|||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
let fullText = "";
|
let fullText = "";
|
||||||
let chunk = "";
|
let chunk = "";
|
||||||
|
|
||||||
|
// Establish listener to early-abort a streaming response
|
||||||
|
// in case things go sideways or the user does not like the response.
|
||||||
|
// We preserve the generated text but continue as if chat was completed
|
||||||
|
// to preserve previously generated content.
|
||||||
|
const handleAbort = () => clientAbortedHandler(resolve, fullText);
|
||||||
|
response.on("close", handleAbort);
|
||||||
|
|
||||||
stream.data.on("data", (data) => {
|
stream.data.on("data", (data) => {
|
||||||
const lines = data
|
const lines = data
|
||||||
?.toString()
|
?.toString()
|
||||||
@ -52,6 +68,7 @@ function handleDefaultStreamResponse(response, stream, responseProps) {
|
|||||||
close: true,
|
close: true,
|
||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
} else {
|
} else {
|
||||||
let finishReason = null;
|
let finishReason = null;
|
||||||
@ -85,6 +102,7 @@ function handleDefaultStreamResponse(response, stream, responseProps) {
|
|||||||
close: true,
|
close: true,
|
||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -141,4 +159,5 @@ module.exports = {
|
|||||||
convertToChatHistory,
|
convertToChatHistory,
|
||||||
convertToPromptHistory,
|
convertToPromptHistory,
|
||||||
writeResponseChunk,
|
writeResponseChunk,
|
||||||
|
clientAbortedHandler,
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user