mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-13 02:00:10 +01:00
Merge branch 'master' of github.com:Mintplex-Labs/anything-llm
This commit is contained in:
commit
318025baee
@ -30,7 +30,11 @@ export default function GeminiLLMOptions({ settings }) {
|
|||||||
required={true}
|
required={true}
|
||||||
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||||
>
|
>
|
||||||
{["gemini-pro", "gemini-1.5-pro-latest"].map((model) => {
|
{[
|
||||||
|
"gemini-pro",
|
||||||
|
"gemini-1.5-pro-latest",
|
||||||
|
"gemini-1.5-flash-latest",
|
||||||
|
].map((model) => {
|
||||||
return (
|
return (
|
||||||
<option key={model} value={model}>
|
<option key={model} value={model}>
|
||||||
{model}
|
{model}
|
||||||
|
@ -10,7 +10,7 @@ export const DISABLED_PROVIDERS = [
|
|||||||
];
|
];
|
||||||
const PROVIDER_DEFAULT_MODELS = {
|
const PROVIDER_DEFAULT_MODELS = {
|
||||||
openai: [],
|
openai: [],
|
||||||
gemini: ["gemini-pro", "gemini-1.5-pro-latest"],
|
gemini: ["gemini-pro", "gemini-1.5-pro-latest", "gemini-1.5-flash-latest"],
|
||||||
anthropic: [
|
anthropic: [
|
||||||
"claude-instant-1.2",
|
"claude-instant-1.2",
|
||||||
"claude-2.0",
|
"claude-2.0",
|
||||||
|
@ -17,8 +17,12 @@ class GeminiLLM {
|
|||||||
this.gemini = genAI.getGenerativeModel(
|
this.gemini = genAI.getGenerativeModel(
|
||||||
{ model: this.model },
|
{ model: this.model },
|
||||||
{
|
{
|
||||||
// Gemini-1.5-pro is only available on the v1beta API.
|
// Gemini-1.5-pro and Gemini-1.5-flash are only available on the v1beta API.
|
||||||
apiVersion: this.model === "gemini-1.5-pro-latest" ? "v1beta" : "v1",
|
apiVersion:
|
||||||
|
this.model === "gemini-1.5-pro-latest" ||
|
||||||
|
this.model === "gemini-1.5-flash-latest"
|
||||||
|
? "v1beta"
|
||||||
|
: "v1",
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
this.limits = {
|
this.limits = {
|
||||||
@ -95,7 +99,11 @@ class GeminiLLM {
|
|||||||
}
|
}
|
||||||
|
|
||||||
isValidChatCompletionModel(modelName = "") {
|
isValidChatCompletionModel(modelName = "") {
|
||||||
const validModels = ["gemini-pro", "gemini-1.5-pro-latest"];
|
const validModels = [
|
||||||
|
"gemini-pro",
|
||||||
|
"gemini-1.5-pro-latest",
|
||||||
|
"gemini-1.5-flash-latest",
|
||||||
|
];
|
||||||
return validModels.includes(modelName);
|
return validModels.includes(modelName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
||||||
const {
|
const {
|
||||||
writeResponseChunk,
|
handleDefaultStreamResponseV2,
|
||||||
clientAbortedHandler,
|
|
||||||
} = require("../../helpers/chat/responses");
|
} = require("../../helpers/chat/responses");
|
||||||
|
|
||||||
class LiteLLM {
|
class LiteLLM {
|
||||||
@ -113,45 +112,7 @@ class LiteLLM {
|
|||||||
}
|
}
|
||||||
|
|
||||||
handleStream(response, stream, responseProps) {
|
handleStream(response, stream, responseProps) {
|
||||||
const { uuid = uuidv4(), sources = [] } = responseProps;
|
return handleDefaultStreamResponseV2(response, stream, responseProps);
|
||||||
|
|
||||||
return new Promise(async (resolve) => {
|
|
||||||
let fullText = "";
|
|
||||||
|
|
||||||
const handleAbort = () => clientAbortedHandler(resolve, fullText);
|
|
||||||
response.on("close", handleAbort);
|
|
||||||
|
|
||||||
for await (const chunk of stream) {
|
|
||||||
const message = chunk?.choices?.[0];
|
|
||||||
const token = message?.delta?.content;
|
|
||||||
|
|
||||||
if (token) {
|
|
||||||
fullText += token;
|
|
||||||
writeResponseChunk(response, {
|
|
||||||
uuid,
|
|
||||||
sources: [],
|
|
||||||
type: "textResponseChunk",
|
|
||||||
textResponse: token,
|
|
||||||
close: false,
|
|
||||||
error: false,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// LiteLLM does not give a finish reason in stream until the final chunk
|
|
||||||
if (message.finish_reason || message.finish_reason === "stop") {
|
|
||||||
writeResponseChunk(response, {
|
|
||||||
uuid,
|
|
||||||
sources,
|
|
||||||
type: "textResponseChunk",
|
|
||||||
textResponse: "",
|
|
||||||
close: true,
|
|
||||||
error: false,
|
|
||||||
});
|
|
||||||
response.removeListener("close", handleAbort);
|
|
||||||
resolve(fullText);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
||||||
|
@ -151,16 +151,27 @@ async function chatWithWorkspace(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
|
const { fillSourceWindow } = require("../helpers/chat");
|
||||||
|
const filledSources = fillSourceWindow({
|
||||||
|
nDocs: workspace?.topN || 4,
|
||||||
|
searchResults: vectorSearchResults.sources,
|
||||||
|
history: rawHistory,
|
||||||
|
filterIdentifiers: pinnedDocIdentifiers,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Why does contextTexts get all the info, but sources only get current search?
|
||||||
|
// This is to give the ability of the LLM to "comprehend" a contextual response without
|
||||||
|
// populating the Citations under a response with documents the user "thinks" are irrelevant
|
||||||
|
// due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
|
||||||
|
// If a past citation was used to answer the question - that is visible in the history so it logically makes sense
|
||||||
|
// and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
|
||||||
|
// TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
|
||||||
|
contextTexts = [...contextTexts, ...filledSources.contextTexts];
|
||||||
sources = [...sources, ...vectorSearchResults.sources];
|
sources = [...sources, ...vectorSearchResults.sources];
|
||||||
|
|
||||||
// If in query mode and no sources are found from the vector search and no pinned documents, do not
|
// If in query mode and no context chunks are found from search, backfill, or pins - do not
|
||||||
// let the LLM try to hallucinate a response or use general knowledge and exit early
|
// let the LLM try to hallucinate a response or use general knowledge and exit early
|
||||||
if (
|
if (chatMode === "query" && contextTexts.length === 0) {
|
||||||
chatMode === "query" &&
|
|
||||||
vectorSearchResults.sources.length === 0 &&
|
|
||||||
pinnedDocIdentifiers.length === 0
|
|
||||||
) {
|
|
||||||
return {
|
return {
|
||||||
id: uuid,
|
id: uuid,
|
||||||
type: "textResponse",
|
type: "textResponse",
|
||||||
@ -224,9 +235,7 @@ async function recentChatHistory({
|
|||||||
workspace,
|
workspace,
|
||||||
thread = null,
|
thread = null,
|
||||||
messageLimit = 20,
|
messageLimit = 20,
|
||||||
chatMode = null,
|
|
||||||
}) {
|
}) {
|
||||||
if (chatMode === "query") return { rawHistory: [], chatHistory: [] };
|
|
||||||
const rawHistory = (
|
const rawHistory = (
|
||||||
await WorkspaceChats.where(
|
await WorkspaceChats.where(
|
||||||
{
|
{
|
||||||
|
@ -100,7 +100,6 @@ async function streamChatWithWorkspace(
|
|||||||
workspace,
|
workspace,
|
||||||
thread,
|
thread,
|
||||||
messageLimit,
|
messageLimit,
|
||||||
chatMode,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
|
// Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
|
||||||
@ -157,16 +156,27 @@ async function streamChatWithWorkspace(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
|
const { fillSourceWindow } = require("../helpers/chat");
|
||||||
|
const filledSources = fillSourceWindow({
|
||||||
|
nDocs: workspace?.topN || 4,
|
||||||
|
searchResults: vectorSearchResults.sources,
|
||||||
|
history: rawHistory,
|
||||||
|
filterIdentifiers: pinnedDocIdentifiers,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Why does contextTexts get all the info, but sources only get current search?
|
||||||
|
// This is to give the ability of the LLM to "comprehend" a contextual response without
|
||||||
|
// populating the Citations under a response with documents the user "thinks" are irrelevant
|
||||||
|
// due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
|
||||||
|
// If a past citation was used to answer the question - that is visible in the history so it logically makes sense
|
||||||
|
// and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
|
||||||
|
// TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
|
||||||
|
contextTexts = [...contextTexts, ...filledSources.contextTexts];
|
||||||
sources = [...sources, ...vectorSearchResults.sources];
|
sources = [...sources, ...vectorSearchResults.sources];
|
||||||
|
|
||||||
// If in query mode and no sources are found from the vector search and no pinned documents, do not
|
// If in query mode and no context chunks are found from search, backfill, or pins - do not
|
||||||
// let the LLM try to hallucinate a response or use general knowledge and exit early
|
// let the LLM try to hallucinate a response or use general knowledge and exit early
|
||||||
if (
|
if (chatMode === "query" && contextTexts.length === 0) {
|
||||||
chatMode === "query" &&
|
|
||||||
sources.length === 0 &&
|
|
||||||
pinnedDocIdentifiers.length === 0
|
|
||||||
) {
|
|
||||||
writeResponseChunk(response, {
|
writeResponseChunk(response, {
|
||||||
id: uuid,
|
id: uuid,
|
||||||
type: "textResponse",
|
type: "textResponse",
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
const { sourceIdentifier } = require("../../chats");
|
||||||
|
const { safeJsonParse } = require("../../http");
|
||||||
const { TokenManager } = require("../tiktoken");
|
const { TokenManager } = require("../tiktoken");
|
||||||
const { convertToPromptHistory } = require("./responses");
|
const { convertToPromptHistory } = require("./responses");
|
||||||
|
|
||||||
@ -343,7 +345,104 @@ function cannonball({
|
|||||||
return truncatedText;
|
return truncatedText;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fill the sources window with the priority of
|
||||||
|
* 1. Pinned documents (handled prior to function)
|
||||||
|
* 2. VectorSearch results
|
||||||
|
* 3. prevSources in chat history - starting from most recent.
|
||||||
|
*
|
||||||
|
* Ensuring the window always has the desired amount of sources so that followup questions
|
||||||
|
* in any chat mode have relevant sources, but not infinite sources. This function is used during chatting
|
||||||
|
* and allows follow-up questions within a query chat that otherwise would have zero sources and would fail.
|
||||||
|
* The added benefit is that during regular RAG chat, we have better coherence of citations that otherwise would
|
||||||
|
* also yield no results with no need for a ReRanker to run and take much longer to return a response.
|
||||||
|
*
|
||||||
|
* The side effect of this is follow-up unrelated questions now have citations that would look totally irrelevant, however
|
||||||
|
* we would rather optimize on the correctness of a response vs showing extraneous sources during a response. Given search
|
||||||
|
* results always take a priority a good unrelated question that produces RAG results will still function as desired and due to previous
|
||||||
|
* history backfill sources "changing context" mid-chat is handled appropriately.
|
||||||
|
* example:
|
||||||
|
* ---previous implementation---
|
||||||
|
* prompt 1: "What is anythingllm?" -> possibly get 4 good sources
|
||||||
|
* prompt 2: "Tell me some features" -> possible get 0 - 1 maybe relevant source + previous answer response -> bad response due to bad context mgmt
|
||||||
|
* ---next implementation---
|
||||||
|
* prompt 1: "What is anythingllm?" -> possibly get 4 good sources
|
||||||
|
* prompt 2: "Tell me some features" -> possible get 0 - 1 maybe relevant source + previous answer response -> backfill with 3 good sources from previous -> much better response
|
||||||
|
*
|
||||||
|
* @param {Object} config - params to call
|
||||||
|
* @param {object} config.nDocs = fill size of the window
|
||||||
|
* @param {object} config.searchResults = vector similarityResponse results for .sources
|
||||||
|
* @param {object[]} config.history - rawHistory of chat containing sources
|
||||||
|
* @param {string[]} config.filterIdentifiers - Pinned document identifiers to prevent duplicate context
|
||||||
|
* @returns {{
|
||||||
|
* contextTexts: string[],
|
||||||
|
* sources: object[],
|
||||||
|
* }} - Array of sources that should be added to window
|
||||||
|
*/
|
||||||
|
function fillSourceWindow({
|
||||||
|
nDocs = 4, // Number of documents
|
||||||
|
searchResults = [], // Sources from similarity search
|
||||||
|
history = [], // Raw history
|
||||||
|
filterIdentifiers = [], // pinned document sources
|
||||||
|
} = config) {
|
||||||
|
const sources = [...searchResults];
|
||||||
|
|
||||||
|
if (sources.length >= nDocs || history.length === 0) {
|
||||||
|
return {
|
||||||
|
sources,
|
||||||
|
contextTexts: sources.map((src) => src.text),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const log = (text, ...args) => {
|
||||||
|
console.log(`\x1b[36m[fillSourceWindow]\x1b[0m ${text}`, ...args);
|
||||||
|
};
|
||||||
|
|
||||||
|
log(
|
||||||
|
`Need to backfill ${nDocs - searchResults.length} chunks to fill in the source window for RAG!`
|
||||||
|
);
|
||||||
|
const seenChunks = new Set(searchResults.map((source) => source.id));
|
||||||
|
|
||||||
|
// We need to reverse again because we need to iterate from bottom of array (most recent chats)
|
||||||
|
// Looking at this function by itself you may think that this loop could be extreme for long history chats,
|
||||||
|
// but this was already handled where `history` we derived. This comes from `recentChatHistory` which
|
||||||
|
// includes a limit for history (default: 20). So this loop does not look as extreme as on first glance.
|
||||||
|
for (const chat of history.reverse()) {
|
||||||
|
if (sources.length >= nDocs) {
|
||||||
|
log(
|
||||||
|
`Citations backfilled to ${nDocs} references from ${searchResults.length} original citations.`
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const chatSources =
|
||||||
|
safeJsonParse(chat.response, { sources: [] })?.sources || [];
|
||||||
|
if (!chatSources?.length || !Array.isArray(chatSources)) continue;
|
||||||
|
|
||||||
|
const validSources = chatSources.filter((source) => {
|
||||||
|
return (
|
||||||
|
filterIdentifiers.includes(sourceIdentifier(source)) == false && // source cannot be in current pins
|
||||||
|
source.hasOwnProperty("score") && // source cannot have come from a pinned document that was previously pinned
|
||||||
|
source.hasOwnProperty("text") && // source has a valid text property we can use
|
||||||
|
seenChunks.has(source.id) == false // is unique
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const validSource of validSources) {
|
||||||
|
if (sources.length >= nDocs) break;
|
||||||
|
sources.push(validSource);
|
||||||
|
seenChunks.add(validSource.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
sources,
|
||||||
|
contextTexts: sources.map((src) => src.text),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
messageArrayCompressor,
|
messageArrayCompressor,
|
||||||
messageStringCompressor,
|
messageStringCompressor,
|
||||||
|
fillSourceWindow,
|
||||||
};
|
};
|
||||||
|
@ -38,8 +38,13 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// LocalAi returns '' and others return null.
|
// LocalAi returns '' and others return null on chunks - the last chunk is not "" or null.
|
||||||
if (message.finish_reason !== "" && message.finish_reason !== null) {
|
// Either way, the key `finish_reason` must be present to determine ending chunk.
|
||||||
|
if (
|
||||||
|
message?.hasOwnProperty("finish_reason") && // Got valid message and it is an object with finish_reason
|
||||||
|
message.finish_reason !== "" &&
|
||||||
|
message.finish_reason !== null
|
||||||
|
) {
|
||||||
writeResponseChunk(response, {
|
writeResponseChunk(response, {
|
||||||
uuid,
|
uuid,
|
||||||
sources,
|
sources,
|
||||||
@ -50,6 +55,7 @@ function handleDefaultStreamResponseV2(response, stream, responseProps) {
|
|||||||
});
|
});
|
||||||
response.removeListener("close", handleAbort);
|
response.removeListener("close", handleAbort);
|
||||||
resolve(fullText);
|
resolve(fullText);
|
||||||
|
break; // Break streaming when a valid finish_reason is first encountered
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -526,7 +526,11 @@ function supportedTranscriptionProvider(input = "") {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function validGeminiModel(input = "") {
|
function validGeminiModel(input = "") {
|
||||||
const validModels = ["gemini-pro", "gemini-1.5-pro-latest"];
|
const validModels = [
|
||||||
|
"gemini-pro",
|
||||||
|
"gemini-1.5-pro-latest",
|
||||||
|
"gemini-1.5-flash-latest",
|
||||||
|
];
|
||||||
return validModels.includes(input)
|
return validModels.includes(input)
|
||||||
? null
|
? null
|
||||||
: `Invalid Model type. Must be one of ${validModels.join(", ")}.`;
|
: `Invalid Model type. Must be one of ${validModels.join(", ")}.`;
|
||||||
|
Loading…
Reference in New Issue
Block a user