mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-10-02 08:50:11 +02:00
Patch Ollama Streaming chunk issues (#500)
Replace stream/sync chats with Langchain interface for now connect #499 ref: https://github.com/Mintplex-Labs/anything-llm/issues/495#issuecomment-1871476091
This commit is contained in:
parent
d7481671ba
commit
2a1202de54
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"cSpell.words": [
|
"cSpell.words": [
|
||||||
"Dockerized",
|
"Dockerized",
|
||||||
|
"Langchain",
|
||||||
"Ollama",
|
"Ollama",
|
||||||
"openai",
|
"openai",
|
||||||
"Qdrant",
|
"Qdrant",
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
const { chatPrompt } = require("../../chats");
|
const { chatPrompt } = require("../../chats");
|
||||||
|
const { StringOutputParser } = require("langchain/schema/output_parser");
|
||||||
|
|
||||||
// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
|
// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
|
||||||
class OllamaAILLM {
|
class OllamaAILLM {
|
||||||
@ -21,6 +22,42 @@ class OllamaAILLM {
|
|||||||
this.embedder = embedder;
|
this.embedder = embedder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ollamaClient({ temperature = 0.07 }) {
|
||||||
|
const { ChatOllama } = require("langchain/chat_models/ollama");
|
||||||
|
return new ChatOllama({
|
||||||
|
baseUrl: this.basePath,
|
||||||
|
model: this.model,
|
||||||
|
temperature,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// For streaming we use Langchain's wrapper to handle weird chunks
|
||||||
|
// or otherwise absorb headaches that can arise from Ollama models
|
||||||
|
#convertToLangchainPrototypes(chats = []) {
|
||||||
|
const {
|
||||||
|
HumanMessage,
|
||||||
|
SystemMessage,
|
||||||
|
AIMessage,
|
||||||
|
} = require("langchain/schema");
|
||||||
|
const langchainChats = [];
|
||||||
|
for (const chat of chats) {
|
||||||
|
switch (chat.role) {
|
||||||
|
case "system":
|
||||||
|
langchainChats.push(new SystemMessage({ content: chat.content }));
|
||||||
|
break;
|
||||||
|
case "user":
|
||||||
|
langchainChats.push(new HumanMessage({ content: chat.content }));
|
||||||
|
break;
|
||||||
|
case "assistant":
|
||||||
|
langchainChats.push(new AIMessage({ content: chat.content }));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return langchainChats;
|
||||||
|
}
|
||||||
|
|
||||||
streamingEnabled() {
|
streamingEnabled() {
|
||||||
return "streamChat" in this && "streamGetChatCompletion" in this;
|
return "streamChat" in this && "streamGetChatCompletion" in this;
|
||||||
}
|
}
|
||||||
@ -63,37 +100,21 @@ Context:
|
|||||||
}
|
}
|
||||||
|
|
||||||
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
|
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
|
||||||
const textResponse = await fetch(`${this.basePath}/api/chat`, {
|
const messages = await this.compressMessages(
|
||||||
method: "POST",
|
{
|
||||||
headers: {
|
systemPrompt: chatPrompt(workspace),
|
||||||
"Content-Type": "application/json",
|
userPrompt: prompt,
|
||||||
|
chatHistory,
|
||||||
},
|
},
|
||||||
body: JSON.stringify({
|
rawHistory
|
||||||
model: this.model,
|
);
|
||||||
stream: false,
|
|
||||||
options: {
|
const model = this.#ollamaClient({
|
||||||
temperature: Number(workspace?.openAiTemp ?? 0.7),
|
temperature: Number(workspace?.openAiTemp ?? 0.7),
|
||||||
},
|
});
|
||||||
messages: await this.compressMessages(
|
const textResponse = await model
|
||||||
{
|
.pipe(new StringOutputParser())
|
||||||
systemPrompt: chatPrompt(workspace),
|
.invoke(this.#convertToLangchainPrototypes(messages));
|
||||||
userPrompt: prompt,
|
|
||||||
chatHistory,
|
|
||||||
},
|
|
||||||
rawHistory
|
|
||||||
),
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
.then((res) => {
|
|
||||||
if (!res.ok)
|
|
||||||
throw new Error(`Ollama:sendChat ${res.status} ${res.statusText}`);
|
|
||||||
return res.json();
|
|
||||||
})
|
|
||||||
.then((data) => data?.message?.content)
|
|
||||||
.catch((e) => {
|
|
||||||
console.error(e);
|
|
||||||
throw new Error(`Ollama::sendChat failed with: ${error.message}`);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!textResponse.length)
|
if (!textResponse.length)
|
||||||
throw new Error(`Ollama::sendChat text response was empty.`);
|
throw new Error(`Ollama::sendChat text response was empty.`);
|
||||||
@ -102,63 +123,29 @@ Context:
|
|||||||
}
|
}
|
||||||
|
|
||||||
async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
|
async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
|
||||||
const response = await fetch(`${this.basePath}/api/chat`, {
|
const messages = await this.compressMessages(
|
||||||
method: "POST",
|
{
|
||||||
headers: {
|
systemPrompt: chatPrompt(workspace),
|
||||||
"Content-Type": "application/json",
|
userPrompt: prompt,
|
||||||
|
chatHistory,
|
||||||
},
|
},
|
||||||
body: JSON.stringify({
|
rawHistory
|
||||||
model: this.model,
|
);
|
||||||
stream: true,
|
|
||||||
options: {
|
|
||||||
temperature: Number(workspace?.openAiTemp ?? 0.7),
|
|
||||||
},
|
|
||||||
messages: await this.compressMessages(
|
|
||||||
{
|
|
||||||
systemPrompt: chatPrompt(workspace),
|
|
||||||
userPrompt: prompt,
|
|
||||||
chatHistory,
|
|
||||||
},
|
|
||||||
rawHistory
|
|
||||||
),
|
|
||||||
}),
|
|
||||||
}).catch((e) => {
|
|
||||||
console.error(e);
|
|
||||||
throw new Error(`Ollama:streamChat ${error.message}`);
|
|
||||||
});
|
|
||||||
|
|
||||||
return { type: "ollamaStream", response };
|
const model = this.#ollamaClient({
|
||||||
|
temperature: Number(workspace?.openAiTemp ?? 0.7),
|
||||||
|
});
|
||||||
|
const stream = await model
|
||||||
|
.pipe(new StringOutputParser())
|
||||||
|
.stream(this.#convertToLangchainPrototypes(messages));
|
||||||
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
async getChatCompletion(messages = null, { temperature = 0.7 }) {
|
async getChatCompletion(messages = null, { temperature = 0.7 }) {
|
||||||
const textResponse = await fetch(`${this.basePath}/api/chat`, {
|
const model = this.#ollamaClient({ temperature });
|
||||||
method: "POST",
|
const textResponse = await model
|
||||||
headers: {
|
.pipe(new StringOutputParser())
|
||||||
"Content-Type": "application/json",
|
.invoke(this.#convertToLangchainPrototypes(messages));
|
||||||
},
|
|
||||||
body: JSON.stringify({
|
|
||||||
model: this.model,
|
|
||||||
messages,
|
|
||||||
stream: false,
|
|
||||||
options: {
|
|
||||||
temperature,
|
|
||||||
},
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
.then((res) => {
|
|
||||||
if (!res.ok)
|
|
||||||
throw new Error(
|
|
||||||
`Ollama:getChatCompletion ${res.status} ${res.statusText}`
|
|
||||||
);
|
|
||||||
return res.json();
|
|
||||||
})
|
|
||||||
.then((data) => data?.message?.content)
|
|
||||||
.catch((e) => {
|
|
||||||
console.error(e);
|
|
||||||
throw new Error(
|
|
||||||
`Ollama::getChatCompletion failed with: ${error.message}`
|
|
||||||
);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!textResponse.length)
|
if (!textResponse.length)
|
||||||
throw new Error(`Ollama::getChatCompletion text response was empty.`);
|
throw new Error(`Ollama::getChatCompletion text response was empty.`);
|
||||||
@ -167,25 +154,11 @@ Context:
|
|||||||
}
|
}
|
||||||
|
|
||||||
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
|
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
|
||||||
const response = await fetch(`${this.basePath}/api/chat`, {
|
const model = this.#ollamaClient({ temperature });
|
||||||
method: "POST",
|
const stream = await model
|
||||||
headers: {
|
.pipe(new StringOutputParser())
|
||||||
"Content-Type": "application/json",
|
.stream(this.#convertToLangchainPrototypes(messages));
|
||||||
},
|
return stream;
|
||||||
body: JSON.stringify({
|
|
||||||
model: this.model,
|
|
||||||
stream: true,
|
|
||||||
messages,
|
|
||||||
options: {
|
|
||||||
temperature,
|
|
||||||
},
|
|
||||||
}),
|
|
||||||
}).catch((e) => {
|
|
||||||
console.error(e);
|
|
||||||
throw new Error(`Ollama:streamGetChatCompletion ${error.message}`);
|
|
||||||
});
|
|
||||||
|
|
||||||
return { type: "ollamaStream", response };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
||||||
|
@ -232,46 +232,19 @@ function handleStreamResponses(response, stream, responseProps) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stream?.type === "ollamaStream") {
|
// If stream is not a regular OpenAI Stream (like if using native model, Ollama, or most LangChain interfaces)
|
||||||
return new Promise(async (resolve) => {
|
|
||||||
let fullText = "";
|
|
||||||
for await (const dataChunk of stream.response.body) {
|
|
||||||
const chunk = JSON.parse(Buffer.from(dataChunk).toString());
|
|
||||||
fullText += chunk.message.content;
|
|
||||||
writeResponseChunk(response, {
|
|
||||||
uuid,
|
|
||||||
sources: [],
|
|
||||||
type: "textResponseChunk",
|
|
||||||
textResponse: chunk.message.content,
|
|
||||||
close: false,
|
|
||||||
error: false,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
writeResponseChunk(response, {
|
|
||||||
uuid,
|
|
||||||
sources,
|
|
||||||
type: "textResponseChunk",
|
|
||||||
textResponse: "",
|
|
||||||
close: true,
|
|
||||||
error: false,
|
|
||||||
});
|
|
||||||
resolve(fullText);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// If stream is not a regular OpenAI Stream (like if using native model)
|
|
||||||
// we can just iterate the stream content instead.
|
// we can just iterate the stream content instead.
|
||||||
if (!stream.hasOwnProperty("data")) {
|
if (!stream.hasOwnProperty("data")) {
|
||||||
return new Promise(async (resolve) => {
|
return new Promise(async (resolve) => {
|
||||||
let fullText = "";
|
let fullText = "";
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
fullText += chunk.content;
|
const content = chunk.hasOwnProperty("content") ? chunk.content : chunk;
|
||||||
|
fullText += content;
|
||||||
writeResponseChunk(response, {
|
writeResponseChunk(response, {
|
||||||
uuid,
|
uuid,
|
||||||
sources: [],
|
sources: [],
|
||||||
type: "textResponseChunk",
|
type: "textResponseChunk",
|
||||||
textResponse: chunk.content,
|
textResponse: content,
|
||||||
close: false,
|
close: false,
|
||||||
error: false,
|
error: false,
|
||||||
});
|
});
|
||||||
|
Loading…
Reference in New Issue
Block a user