mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-05 06:20:10 +01:00
Handle special token in TikToken (#528)
* Handle special token in TikToken resolves #525 * remove duplicate method add clarification comment on implementation
This commit is contained in:
parent
a2a903741d
commit
92da23e963
@ -300,7 +300,7 @@ function cannonball({
|
||||
// if the delta is the token difference between where our prompt is in size
|
||||
// and where we ideally need to land.
|
||||
const delta = initialInputSize - targetTokenSize;
|
||||
const tokenChunks = tokenManager.tokensFromString(input);
|
||||
const tokenChunks = tokenManager.countFromString(input);
|
||||
const middleIdx = Math.floor(tokenChunks.length / 2);
|
||||
|
||||
// middle truncate the text going left and right of midpoint
|
||||
|
@ -3,12 +3,11 @@ const { getEncodingNameForModel, getEncoding } = require("js-tiktoken");
|
||||
class TokenManager {
|
||||
constructor(model = "gpt-3.5-turbo") {
|
||||
this.model = model;
|
||||
this.encoderName = this.getEncodingFromModel(model);
|
||||
this.encoderName = this.#getEncodingFromModel(model);
|
||||
this.encoder = getEncoding(this.encoderName);
|
||||
this.buffer = 50;
|
||||
}
|
||||
|
||||
getEncodingFromModel(model) {
|
||||
#getEncodingFromModel(model) {
|
||||
try {
|
||||
return getEncodingNameForModel(model);
|
||||
} catch {
|
||||
@ -16,18 +15,15 @@ class TokenManager {
|
||||
}
|
||||
}
|
||||
|
||||
tokensFromString(input = "") {
|
||||
const tokens = this.encoder.encode(input);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
bytesFromTokens(tokens = []) {
|
||||
const bytes = this.encoder.decode(tokens);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
// Pass in an empty array of disallowedSpecials to handle all tokens as text and to be tokenized.
|
||||
// https://github.com/openai/tiktoken/blob/9e79899bc248d5313c7dd73562b5e211d728723d/tiktoken/core.py#L91C20-L91C38
|
||||
countFromString(input = "") {
|
||||
const tokens = this.encoder.encode(input);
|
||||
const tokens = this.encoder.encode(input, undefined, []);
|
||||
return tokens.length;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user