From 3e088f22b1f87112a3263ee3c231d9937139f59e Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Fri, 5 Jan 2024 09:39:19 -0800 Subject: [PATCH] fix: Patch tiktoken method missing resolves #541 --- server/utils/helpers/chat/index.js | 2 +- server/utils/helpers/tiktoken.js | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/server/utils/helpers/chat/index.js b/server/utils/helpers/chat/index.js index b969201e..ed7eab90 100644 --- a/server/utils/helpers/chat/index.js +++ b/server/utils/helpers/chat/index.js @@ -300,7 +300,7 @@ function cannonball({ // if the delta is the token difference between where our prompt is in size // and where we ideally need to land. const delta = initialInputSize - targetTokenSize; - const tokenChunks = tokenManager.countFromString(input); + const tokenChunks = tokenManager.tokensFromString(input); const middleIdx = Math.floor(tokenChunks.length / 2); // middle truncate the text going left and right of midpoint diff --git a/server/utils/helpers/tiktoken.js b/server/utils/helpers/tiktoken.js index 13475549..c5852892 100644 --- a/server/utils/helpers/tiktoken.js +++ b/server/utils/helpers/tiktoken.js @@ -15,15 +15,22 @@ class TokenManager { } } + // Pass in an empty array of disallowedSpecials to handle all tokens as text and to be tokenized. + // https://github.com/openai/tiktoken/blob/9e79899bc248d5313c7dd73562b5e211d728723d/tiktoken/core.py#L91C20-L91C38 + // Returns number[] + tokensFromString(input = "") { + const tokens = this.encoder.encode(input, undefined, []); + return tokens; + } + bytesFromTokens(tokens = []) { const bytes = this.encoder.decode(tokens); return bytes; } - // Pass in an empty array of disallowedSpecials to handle all tokens as text and to be tokenized. - // https://github.com/openai/tiktoken/blob/9e79899bc248d5313c7dd73562b5e211d728723d/tiktoken/core.py#L91C20-L91C38 + // Returns number countFromString(input = "") { - const tokens = this.encoder.encode(input, undefined, []); + const tokens = this.tokensFromString(input); return tokens.length; }