From 894f727903d8cabc785d5200411665a338ce3931 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Wed, 1 May 2024 13:32:52 -0700 Subject: [PATCH] Remove restrictions on pinned documents to use more context (#1248) * Remove restrictions on pinned documents to use more contet * update comment --- server/utils/chats/embed.js | 5 ++--- server/utils/chats/index.js | 5 ++--- server/utils/chats/stream.js | 6 +++++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/server/utils/chats/embed.js b/server/utils/chats/embed.js index 94df306f..533ea0c3 100644 --- a/server/utils/chats/embed.js +++ b/server/utils/chats/embed.js @@ -77,11 +77,10 @@ async function streamChatWithForEmbed( chatMode ); - // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search - // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window. + // See stream.js comment for more information on this implementation. await new DocumentManager({ workspace: embed.workspace, - maxTokens: LLMConnector.limits.system, + maxTokens: LLMConnector.promptWindowLimit(), }) .pinnedDocs() .then((pinnedDocs) => { diff --git a/server/utils/chats/index.js b/server/utils/chats/index.js index 87d96c47..38ce6c9b 100644 --- a/server/utils/chats/index.js +++ b/server/utils/chats/index.js @@ -89,11 +89,10 @@ async function chatWithWorkspace( chatMode, }); - // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search - // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window. + // See stream.js comment for more information on this implementation. await new DocumentManager({ workspace, - maxTokens: LLMConnector.limits.system, + maxTokens: LLMConnector.promptWindowLimit(), }) .pinnedDocs() .then((pinnedDocs) => { diff --git a/server/utils/chats/stream.js b/server/utils/chats/stream.js index 0e471161..57f32666 100644 --- a/server/utils/chats/stream.js +++ b/server/utils/chats/stream.js @@ -105,9 +105,13 @@ async function streamChatWithWorkspace( // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window. + // However we limit the maximum of appended context to 80% of its overall size, mostly because if it expands beyond this + // it will undergo prompt compression anyway to make it work. If there is so much pinned that the context here is bigger than + // what the model can support - it would get compressed anyway and that really is not the point of pinning. It is really best + // suited for high-context models. await new DocumentManager({ workspace, - maxTokens: LLMConnector.limits.system, + maxTokens: LLMConnector.promptWindowLimit(), }) .pinnedDocs() .then((pinnedDocs) => {