mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-10 17:00:11 +01:00
Patch handling of end chunk stream events for OpenAI endpoints (#1487)
* Patch handling of end chunk stream events for OpenAI endpoints * update LiteLLM to use generic handler * update for empty choices
This commit is contained in:
parent
13fb63930b
commit
2f9b785f42
@ -1,7 +1,6 @@
|
||||
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
||||
const {
|
||||
writeResponseChunk,
|
||||
clientAbortedHandler,
|
||||
handleDefaultStreamResponseV2,
|
||||
} = require("../../helpers/chat/responses");
|
||||
|
||||
class LiteLLM {
|
||||
@ -113,45 +112,7 @@ class LiteLLM {
|
||||
}
|
||||
|
||||
handleStream(response, stream, responseProps) {
|
||||
const { uuid = uuidv4(), sources = [] } = responseProps;
|
||||
|
||||
return new Promise(async (resolve) => {
|
||||
let fullText = "";
|
||||
|
||||
const handleAbort = () => clientAbortedHandler(resolve, fullText);
|
||||
response.on("close", handleAbort);
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const message = chunk?.choices?.[0];
|
||||
const token = message?.delta?.content;
|
||||
|
||||
if (token) {
|
||||
fullText += token;
|
||||
writeResponseChunk(response, {
|
||||
uuid,
|
||||
sources: [],
|
||||
type: "textResponseChunk",
|
||||
textResponse: token,
|
||||
close: false,
|
||||
error: false,
|
||||
});
|
||||
}
|
||||
|
||||
// LiteLLM does not give a finish reason in stream until the final chunk
|
||||
if (message.finish_reason || message.finish_reason === "stop") {
|
||||
writeResponseChunk(response, {
|
||||
uuid,
|
||||
sources,
|
||||
type: "textResponseChunk",
|
||||
textResponse: "",
|
||||
close: true,
|
||||
error: false,
|
||||
});
|
||||
response.removeListener("close", handleAbort);
|
||||
resolve(fullText);
|
||||
}
|
||||
}
|
||||
});
|
||||
return handleDefaultStreamResponseV2(response, stream, responseProps);
|
||||
}
|
||||
|
||||
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
||||
|
@ -38,8 +38,13 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
|
||||
});
|
||||
}
|
||||
|
||||
// LocalAi returns '' and others return null.
|
||||
if (message.finish_reason !== "" && message.finish_reason !== null) {
|
||||
// LocalAi returns '' and others return null on chunks - the last chunk is not "" or null.
|
||||
// Either way, the key `finish_reason` must be present to determine ending chunk.
|
||||
if (
|
||||
message?.hasOwnProperty("finish_reason") && // Got valid message and it is an object with finish_reason
|
||||
message.finish_reason !== "" &&
|
||||
message.finish_reason !== null
|
||||
) {
|
||||
writeResponseChunk(response, {
|
||||
uuid,
|
||||
sources,
|
||||
@ -50,6 +55,7 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
|
||||
});
|
||||
response.removeListener("close", handleAbort);
|
||||
resolve(fullText);
|
||||
break; // Break streaming when a valid finish_reason is first encountered
|
||||
}
|
||||
}
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user