mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-15 10:50:31 +01:00
910eb36cfe
fix query mode always responding with refusal message on develop api openai compatible endpoints
478 lines
13 KiB
JavaScript
478 lines
13 KiB
JavaScript
const { v4: uuidv4 } = require("uuid");
|
|
const { DocumentManager } = require("../DocumentManager");
|
|
const { WorkspaceChats } = require("../../models/workspaceChats");
|
|
const { getVectorDbClass, getLLMProvider } = require("../helpers");
|
|
const { writeResponseChunk } = require("../helpers/chat/responses");
|
|
const { chatPrompt, sourceIdentifier } = require("./index");
|
|
|
|
const { PassThrough } = require("stream");
|
|
|
|
async function chatSync({
|
|
workspace,
|
|
systemPrompt = null,
|
|
history = [],
|
|
prompt = null,
|
|
temperature = null,
|
|
}) {
|
|
const uuid = uuidv4();
|
|
const chatMode = workspace?.chatMode ?? "chat";
|
|
const LLMConnector = getLLMProvider({
|
|
provider: workspace?.chatProvider,
|
|
model: workspace?.chatModel,
|
|
});
|
|
const VectorDb = getVectorDbClass();
|
|
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
|
|
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
|
|
|
|
// User is trying to query-mode chat a workspace that has no data in it - so
|
|
// we should exit early as no information can be found under these conditions.
|
|
if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
|
|
const textResponse =
|
|
workspace?.queryRefusalResponse ??
|
|
"There is no relevant information in this workspace to answer your query.";
|
|
|
|
await WorkspaceChats.new({
|
|
workspaceId: workspace.id,
|
|
prompt: String(prompt),
|
|
response: {
|
|
text: textResponse,
|
|
sources: [],
|
|
type: chatMode,
|
|
},
|
|
include: false,
|
|
});
|
|
|
|
return formatJSON(
|
|
{
|
|
id: uuid,
|
|
type: "textResponse",
|
|
sources: [],
|
|
close: true,
|
|
error: null,
|
|
textResponse,
|
|
},
|
|
{ model: workspace.slug, finish_reason: "abort" }
|
|
);
|
|
}
|
|
|
|
// If we are here we know that we are in a workspace that is:
|
|
// 1. Chatting in "chat" mode and may or may _not_ have embeddings
|
|
// 2. Chatting in "query" mode and has at least 1 embedding
|
|
let contextTexts = [];
|
|
let sources = [];
|
|
let pinnedDocIdentifiers = [];
|
|
await new DocumentManager({
|
|
workspace,
|
|
maxTokens: LLMConnector.promptWindowLimit(),
|
|
})
|
|
.pinnedDocs()
|
|
.then((pinnedDocs) => {
|
|
pinnedDocs.forEach((doc) => {
|
|
const { pageContent, ...metadata } = doc;
|
|
pinnedDocIdentifiers.push(sourceIdentifier(doc));
|
|
contextTexts.push(doc.pageContent);
|
|
sources.push({
|
|
text:
|
|
pageContent.slice(0, 1_000) +
|
|
"...continued on in source document...",
|
|
...metadata,
|
|
});
|
|
});
|
|
});
|
|
|
|
const vectorSearchResults =
|
|
embeddingsCount !== 0
|
|
? await VectorDb.performSimilaritySearch({
|
|
namespace: workspace.slug,
|
|
input: prompt,
|
|
LLMConnector,
|
|
similarityThreshold: workspace?.similarityThreshold,
|
|
topN: workspace?.topN,
|
|
filterIdentifiers: pinnedDocIdentifiers,
|
|
})
|
|
: {
|
|
contextTexts: [],
|
|
sources: [],
|
|
message: null,
|
|
};
|
|
|
|
// Failed similarity search if it was run at all and failed.
|
|
if (!!vectorSearchResults.message) {
|
|
return formatJSON(
|
|
{
|
|
id: uuid,
|
|
type: "abort",
|
|
textResponse: null,
|
|
sources: [],
|
|
close: true,
|
|
error: vectorSearchResults.message,
|
|
},
|
|
{ model: workspace.slug, finish_reason: "abort" }
|
|
);
|
|
}
|
|
|
|
// For OpenAI Compatible chats, we cannot do backfilling so we simply aggregate results here.
|
|
contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
|
|
sources = [...sources, ...vectorSearchResults.sources];
|
|
|
|
// If in query mode and no context chunks are found from search, backfill, or pins - do not
|
|
// let the LLM try to hallucinate a response or use general knowledge and exit early
|
|
if (chatMode === "query" && contextTexts.length === 0) {
|
|
const textResponse =
|
|
workspace?.queryRefusalResponse ??
|
|
"There is no relevant information in this workspace to answer your query.";
|
|
|
|
await WorkspaceChats.new({
|
|
workspaceId: workspace.id,
|
|
prompt: prompt,
|
|
response: {
|
|
text: textResponse,
|
|
sources: [],
|
|
type: chatMode,
|
|
},
|
|
include: false,
|
|
});
|
|
|
|
return formatJSON(
|
|
{
|
|
id: uuid,
|
|
type: "textResponse",
|
|
sources: [],
|
|
close: true,
|
|
error: null,
|
|
textResponse,
|
|
},
|
|
{ model: workspace.slug, finish_reason: "no_content" }
|
|
);
|
|
}
|
|
|
|
// Compress & Assemble message to ensure prompt passes token limit with room for response
|
|
// and build system messages based on inputs and history.
|
|
const messages = await LLMConnector.compressMessages({
|
|
systemPrompt: systemPrompt ?? chatPrompt(workspace),
|
|
userPrompt: prompt,
|
|
contextTexts,
|
|
chatHistory: history,
|
|
});
|
|
|
|
// Send the text completion.
|
|
const textResponse = await LLMConnector.getChatCompletion(messages, {
|
|
temperature:
|
|
temperature ?? workspace?.openAiTemp ?? LLMConnector.defaultTemp,
|
|
});
|
|
|
|
if (!textResponse) {
|
|
return formatJSON(
|
|
{
|
|
id: uuid,
|
|
type: "textResponse",
|
|
sources: [],
|
|
close: true,
|
|
error: "No text completion could be completed with this input.",
|
|
textResponse: null,
|
|
},
|
|
{ model: workspace.slug, finish_reason: "no_content" }
|
|
);
|
|
}
|
|
|
|
const { chat } = await WorkspaceChats.new({
|
|
workspaceId: workspace.id,
|
|
prompt: prompt,
|
|
response: { text: textResponse, sources, type: chatMode },
|
|
});
|
|
|
|
return formatJSON(
|
|
{
|
|
id: uuid,
|
|
type: "textResponse",
|
|
close: true,
|
|
error: null,
|
|
chatId: chat.id,
|
|
textResponse,
|
|
sources,
|
|
},
|
|
{ model: workspace.slug, finish_reason: "stop" }
|
|
);
|
|
}
|
|
|
|
async function streamChat({
|
|
workspace,
|
|
response,
|
|
systemPrompt = null,
|
|
history = [],
|
|
prompt = null,
|
|
temperature = null,
|
|
}) {
|
|
const uuid = uuidv4();
|
|
const chatMode = workspace?.chatMode ?? "chat";
|
|
const LLMConnector = getLLMProvider({
|
|
provider: workspace?.chatProvider,
|
|
model: workspace?.chatModel,
|
|
});
|
|
const VectorDb = getVectorDbClass();
|
|
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
|
|
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
|
|
|
|
// We don't want to write a new method for every LLM to support openAI calls
|
|
// via the `handleStreamResponseV2` method handler. So here we create a passthrough
|
|
// that on writes to the main response, transforms the chunk to OpenAI format.
|
|
// The chunk is coming in the format from `writeResponseChunk` but in the AnythingLLM
|
|
// response chunk schema, so we here we mutate each chunk.
|
|
const responseInterceptor = new PassThrough({});
|
|
responseInterceptor.on("data", (chunk) => {
|
|
try {
|
|
const originalData = JSON.parse(chunk.toString().split("data: ")[1]);
|
|
const modified = formatJSON(originalData, {
|
|
chunked: true,
|
|
model: workspace.slug,
|
|
}); // rewrite to OpenAI format
|
|
response.write(`data: ${JSON.stringify(modified)}\n\n`);
|
|
} catch (e) {
|
|
console.error(e);
|
|
}
|
|
});
|
|
|
|
// User is trying to query-mode chat a workspace that has no data in it - so
|
|
// we should exit early as no information can be found under these conditions.
|
|
if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
|
|
const textResponse =
|
|
workspace?.queryRefusalResponse ??
|
|
"There is no relevant information in this workspace to answer your query.";
|
|
|
|
await WorkspaceChats.new({
|
|
workspaceId: workspace.id,
|
|
prompt: String(prompt),
|
|
response: {
|
|
text: textResponse,
|
|
sources: [],
|
|
type: chatMode,
|
|
},
|
|
include: false,
|
|
});
|
|
|
|
writeResponseChunk(
|
|
response,
|
|
formatJSON(
|
|
{
|
|
id: uuid,
|
|
type: "textResponse",
|
|
sources: [],
|
|
close: true,
|
|
error: null,
|
|
textResponse,
|
|
},
|
|
{ chunked: true, model: workspace.slug, finish_reason: "abort" }
|
|
)
|
|
);
|
|
return;
|
|
}
|
|
|
|
// If we are here we know that we are in a workspace that is:
|
|
// 1. Chatting in "chat" mode and may or may _not_ have embeddings
|
|
// 2. Chatting in "query" mode and has at least 1 embedding
|
|
let contextTexts = [];
|
|
let sources = [];
|
|
let pinnedDocIdentifiers = [];
|
|
await new DocumentManager({
|
|
workspace,
|
|
maxTokens: LLMConnector.promptWindowLimit(),
|
|
})
|
|
.pinnedDocs()
|
|
.then((pinnedDocs) => {
|
|
pinnedDocs.forEach((doc) => {
|
|
const { pageContent, ...metadata } = doc;
|
|
pinnedDocIdentifiers.push(sourceIdentifier(doc));
|
|
contextTexts.push(doc.pageContent);
|
|
sources.push({
|
|
text:
|
|
pageContent.slice(0, 1_000) +
|
|
"...continued on in source document...",
|
|
...metadata,
|
|
});
|
|
});
|
|
});
|
|
|
|
const vectorSearchResults =
|
|
embeddingsCount !== 0
|
|
? await VectorDb.performSimilaritySearch({
|
|
namespace: workspace.slug,
|
|
input: prompt,
|
|
LLMConnector,
|
|
similarityThreshold: workspace?.similarityThreshold,
|
|
topN: workspace?.topN,
|
|
filterIdentifiers: pinnedDocIdentifiers,
|
|
})
|
|
: {
|
|
contextTexts: [],
|
|
sources: [],
|
|
message: null,
|
|
};
|
|
|
|
// Failed similarity search if it was run at all and failed.
|
|
if (!!vectorSearchResults.message) {
|
|
writeResponseChunk(
|
|
response,
|
|
formatJSON(
|
|
{
|
|
id: uuid,
|
|
type: "abort",
|
|
textResponse: null,
|
|
sources: [],
|
|
close: true,
|
|
error: vectorSearchResults.message,
|
|
},
|
|
{ chunked: true, model: workspace.slug, finish_reason: "abort" }
|
|
)
|
|
);
|
|
return;
|
|
}
|
|
|
|
// For OpenAI Compatible chats, we cannot do backfilling so we simply aggregate results here.
|
|
contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
|
|
sources = [...sources, ...vectorSearchResults.sources];
|
|
|
|
// If in query mode and no context chunks are found from search, backfill, or pins - do not
|
|
// let the LLM try to hallucinate a response or use general knowledge and exit early
|
|
if (chatMode === "query" && contextTexts.length === 0) {
|
|
const textResponse =
|
|
workspace?.queryRefusalResponse ??
|
|
"There is no relevant information in this workspace to answer your query.";
|
|
|
|
await WorkspaceChats.new({
|
|
workspaceId: workspace.id,
|
|
prompt: prompt,
|
|
response: {
|
|
text: textResponse,
|
|
sources: [],
|
|
type: chatMode,
|
|
},
|
|
include: false,
|
|
});
|
|
|
|
writeResponseChunk(
|
|
response,
|
|
formatJSON(
|
|
{
|
|
id: uuid,
|
|
type: "textResponse",
|
|
sources: [],
|
|
close: true,
|
|
error: null,
|
|
textResponse,
|
|
},
|
|
{ chunked: true, model: workspace.slug, finish_reason: "no_content" }
|
|
)
|
|
);
|
|
return;
|
|
}
|
|
|
|
// Compress & Assemble message to ensure prompt passes token limit with room for response
|
|
// and build system messages based on inputs and history.
|
|
const messages = await LLMConnector.compressMessages({
|
|
systemPrompt: systemPrompt ?? chatPrompt(workspace),
|
|
userPrompt: prompt,
|
|
contextTexts,
|
|
chatHistory: history,
|
|
});
|
|
|
|
if (!LLMConnector.streamingEnabled()) {
|
|
writeResponseChunk(
|
|
response,
|
|
formatJSON(
|
|
{
|
|
id: uuid,
|
|
type: "textResponse",
|
|
sources: [],
|
|
close: true,
|
|
error: "Streaming is not available for the connected LLM Provider",
|
|
textResponse: null,
|
|
},
|
|
{
|
|
chunked: true,
|
|
model: workspace.slug,
|
|
finish_reason: "streaming_disabled",
|
|
}
|
|
)
|
|
);
|
|
return;
|
|
}
|
|
|
|
const stream = await LLMConnector.streamGetChatCompletion(messages, {
|
|
temperature:
|
|
temperature ?? workspace?.openAiTemp ?? LLMConnector.defaultTemp,
|
|
});
|
|
const completeText = await LLMConnector.handleStream(
|
|
responseInterceptor,
|
|
stream,
|
|
{
|
|
uuid,
|
|
sources,
|
|
}
|
|
);
|
|
|
|
if (completeText?.length > 0) {
|
|
const { chat } = await WorkspaceChats.new({
|
|
workspaceId: workspace.id,
|
|
prompt: prompt,
|
|
response: { text: completeText, sources, type: chatMode },
|
|
});
|
|
|
|
writeResponseChunk(
|
|
response,
|
|
formatJSON(
|
|
{
|
|
uuid,
|
|
type: "finalizeResponseStream",
|
|
close: true,
|
|
error: false,
|
|
chatId: chat.id,
|
|
textResponse: "",
|
|
},
|
|
{ chunked: true, model: workspace.slug, finish_reason: "stop" }
|
|
)
|
|
);
|
|
return;
|
|
}
|
|
|
|
writeResponseChunk(
|
|
response,
|
|
formatJSON(
|
|
{
|
|
uuid,
|
|
type: "finalizeResponseStream",
|
|
close: true,
|
|
error: false,
|
|
textResponse: "",
|
|
},
|
|
{ chunked: true, model: workspace.slug, finish_reason: "stop" }
|
|
)
|
|
);
|
|
return;
|
|
}
|
|
|
|
function formatJSON(chat, { chunked = false, model, finish_reason = null }) {
|
|
const data = {
|
|
id: chat.uuid ?? chat.id,
|
|
object: "chat.completion",
|
|
created: Number(new Date()),
|
|
model: model,
|
|
choices: [
|
|
{
|
|
[chunked ? "delta" : "message"]: {
|
|
role: "assistant",
|
|
content: chat.textResponse,
|
|
},
|
|
logprobs: null,
|
|
finish_reason: finish_reason,
|
|
},
|
|
],
|
|
};
|
|
|
|
return data;
|
|
}
|
|
|
|
module.exports.OpenAICompatibleChat = {
|
|
chatSync,
|
|
streamChat,
|
|
};
|