Patch handling of end chunk stream events for OpenAI endpoints (#1487)

* Patch handling of end chunk stream events for OpenAI endpoints

* update LiteLLM to use generic handler

* update for empty choices
This commit is contained in:
Timothy Carambat 2024-05-23 12:20:40 -05:00 committed by GitHub
parent 13fb63930b
commit 2f9b785f42
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 10 additions and 43 deletions

View File

@ -1,7 +1,6 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
writeResponseChunk,
clientAbortedHandler,
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
class LiteLLM {
@ -113,45 +112,7 @@ class LiteLLM {
}
handleStream(response, stream, responseProps) {
const { uuid = uuidv4(), sources = [] } = responseProps;
return new Promise(async (resolve) => {
let fullText = "";
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
for await (const chunk of stream) {
const message = chunk?.choices?.[0];
const token = message?.delta?.content;
if (token) {
fullText += token;
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: token,
close: false,
error: false,
});
}
// LiteLLM does not give a finish reason in stream until the final chunk
if (message.finish_reason || message.finish_reason === "stop") {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
}
}
});
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations

View File

@ -38,8 +38,13 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
});
}
// LocalAi returns '' and others return null.
if (message.finish_reason !== "" && message.finish_reason !== null) {
// LocalAi returns '' and others return null on chunks - the last chunk is not "" or null.
// Either way, the key `finish_reason` must be present to determine ending chunk.
if (
message?.hasOwnProperty("finish_reason") && // Got valid message and it is an object with finish_reason
message.finish_reason !== "" &&
message.finish_reason !== null
) {
writeResponseChunk(response, {
uuid,
sources,
@ -50,6 +55,7 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
});
response.removeListener("close", handleAbort);
resolve(fullText);
break; // Break streaming when a valid finish_reason is first encountered
}
}
});