Patch handling of end chunk stream events for OpenAI endpoints (#1487)

* Patch handling of end chunk stream events for OpenAI endpoints * update LiteLLM to use generic handler * update for empty choices
2024-11-10 17:00:11 +01:00 · 2024-05-23 12:20:40 -05:00 · 2024-05-23 12:20:40 -05:00 · 2f9b785f42
commit 2f9b785f42
parent 13fb63930b
2 changed files with 10 additions and 43 deletions
--- a/server/utils/AiProviders/liteLLM/index.js
+++ b/server/utils/AiProviders/liteLLM/index.js
@ -1,7 +1,6 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const {
-  writeResponseChunk,
-  clientAbortedHandler,
+  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");

 class LiteLLM {
@ -113,45 +112,7 @@ class LiteLLM {
  }

  handleStream(response, stream, responseProps) {
-    const { uuid = uuidv4(), sources = [] } = responseProps;
-
-    return new Promise(async (resolve) => {
-      let fullText = "";
-
-      const handleAbort = () => clientAbortedHandler(resolve, fullText);
-      response.on("close", handleAbort);
-
-      for await (const chunk of stream) {
-        const message = chunk?.choices?.[0];
-        const token = message?.delta?.content;
-
-        if (token) {
-          fullText += token;
-          writeResponseChunk(response, {
-            uuid,
-            sources: [],
-            type: "textResponseChunk",
-            textResponse: token,
-            close: false,
-            error: false,
-          });
-        }
-
-        // LiteLLM does not give a finish reason in stream until the final chunk
-        if (message.finish_reason || message.finish_reason === "stop") {
-          writeResponseChunk(response, {
-            uuid,
-            sources,
-            type: "textResponseChunk",
-            textResponse: "",
-            close: true,
-            error: false,
-          });
-          response.removeListener("close", handleAbort);
-          resolve(fullText);
-        }
-      }
-    });
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }

  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
--- a/server/utils/helpers/chat/responses.js
+++ b/server/utils/helpers/chat/responses.js
@ -38,8 +38,13 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
        });
      }

-      // LocalAi returns '' and others return null.
-      if (message.finish_reason !== "" && message.finish_reason !== null) {
+      // LocalAi returns '' and others return null on chunks - the last chunk is not "" or null.
+      // Either way, the key `finish_reason` must be present to determine ending chunk.
+      if (
+        message?.hasOwnProperty("finish_reason") && // Got valid message and it is an object with finish_reason
+        message.finish_reason !== "" &&
+        message.finish_reason !== null
+      ) {
        writeResponseChunk(response, {
          uuid,
          sources,
@ -50,6 +55,7 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
        });
        response.removeListener("close", handleAbort);
        resolve(fullText);
+        break; // Break streaming when a valid finish_reason is first encountered
      }
    }
  });