Remove restrictions on pinned documents to use more context (#1248)

* Remove restrictions on pinned documents to use more contet * update comment
2024-11-11 01:10:11 +01:00 · 2024-05-01 13:32:52 -07:00 · 2024-05-01 13:32:52 -07:00 · 894f727903
commit 894f727903
parent e61dfd80a5
3 changed files with 9 additions and 7 deletions
--- a/server/utils/chats/embed.js
+++ b/server/utils/chats/embed.js
@ -77,11 +77,10 @@ async function streamChatWithForEmbed(
    chatMode
  );

-  // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
-  // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window.
+  // See stream.js comment for more information on this implementation.
  await new DocumentManager({
    workspace: embed.workspace,
-    maxTokens: LLMConnector.limits.system,
+    maxTokens: LLMConnector.promptWindowLimit(),
  })
    .pinnedDocs()
    .then((pinnedDocs) => {
--- a/server/utils/chats/index.js
+++ b/server/utils/chats/index.js
@ -89,11 +89,10 @@ async function chatWithWorkspace(
    chatMode,
  });

-  // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
-  // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window.
+  // See stream.js comment for more information on this implementation.
  await new DocumentManager({
    workspace,
-    maxTokens: LLMConnector.limits.system,
+    maxTokens: LLMConnector.promptWindowLimit(),
  })
    .pinnedDocs()
    .then((pinnedDocs) => {
--- a/server/utils/chats/stream.js
+++ b/server/utils/chats/stream.js
@ -105,9 +105,13 @@ async function streamChatWithWorkspace(

  // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
  // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window.
+  // However we limit the maximum of appended context to 80% of its overall size, mostly because if it expands beyond this
+  // it will undergo prompt compression anyway to make it work. If there is so much pinned that the context here is bigger than
+  // what the model can support - it would get compressed anyway and that really is not the point of pinning. It is really best
+  // suited for high-context models.
  await new DocumentManager({
    workspace,
-    maxTokens: LLMConnector.limits.system,
+    maxTokens: LLMConnector.promptWindowLimit(),
  })
    .pinnedDocs()
    .then((pinnedDocs) => {