From 2f9b785f424ba538bb37fc8de3842a74b650d8e1 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Thu, 23 May 2024 12:20:40 -0500 Subject: [PATCH] Patch handling of end chunk stream events for OpenAI endpoints (#1487) * Patch handling of end chunk stream events for OpenAI endpoints * update LiteLLM to use generic handler * update for empty choices --- server/utils/AiProviders/liteLLM/index.js | 43 ++--------------------- server/utils/helpers/chat/responses.js | 10 ++++-- 2 files changed, 10 insertions(+), 43 deletions(-) diff --git a/server/utils/AiProviders/liteLLM/index.js b/server/utils/AiProviders/liteLLM/index.js index 2c7fa823..28d0b71d 100644 --- a/server/utils/AiProviders/liteLLM/index.js +++ b/server/utils/AiProviders/liteLLM/index.js @@ -1,7 +1,6 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native"); const { - writeResponseChunk, - clientAbortedHandler, + handleDefaultStreamResponseV2, } = require("../../helpers/chat/responses"); class LiteLLM { @@ -113,45 +112,7 @@ class LiteLLM { } handleStream(response, stream, responseProps) { - const { uuid = uuidv4(), sources = [] } = responseProps; - - return new Promise(async (resolve) => { - let fullText = ""; - - const handleAbort = () => clientAbortedHandler(resolve, fullText); - response.on("close", handleAbort); - - for await (const chunk of stream) { - const message = chunk?.choices?.[0]; - const token = message?.delta?.content; - - if (token) { - fullText += token; - writeResponseChunk(response, { - uuid, - sources: [], - type: "textResponseChunk", - textResponse: token, - close: false, - error: false, - }); - } - - // LiteLLM does not give a finish reason in stream until the final chunk - if (message.finish_reason || message.finish_reason === "stop") { - writeResponseChunk(response, { - uuid, - sources, - type: "textResponseChunk", - textResponse: "", - close: true, - error: false, - }); - response.removeListener("close", handleAbort); - resolve(fullText); - } - } - }); + return handleDefaultStreamResponseV2(response, stream, responseProps); } // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations diff --git a/server/utils/helpers/chat/responses.js b/server/utils/helpers/chat/responses.js index d49c8a85..d07eae30 100644 --- a/server/utils/helpers/chat/responses.js +++ b/server/utils/helpers/chat/responses.js @@ -38,8 +38,13 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) { }); } - // LocalAi returns '' and others return null. - if (message.finish_reason !== "" && message.finish_reason !== null) { + // LocalAi returns '' and others return null on chunks - the last chunk is not "" or null. + // Either way, the key `finish_reason` must be present to determine ending chunk. + if ( + message?.hasOwnProperty("finish_reason") && // Got valid message and it is an object with finish_reason + message.finish_reason !== "" && + message.finish_reason !== null + ) { writeResponseChunk(response, { uuid, sources, @@ -50,6 +55,7 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) { }); response.removeListener("close", handleAbort); resolve(fullText); + break; // Break streaming when a valid finish_reason is first encountered } } });