anything-llm/server/utils/helpers/tiktoken.js

const { getEncodingNameForModel, getEncoding } = require("js-tiktoken");

class TokenManager {
  constructor(model = "gpt-3.5-turbo") {
    this.model = model;
    this.encoderName = this.getEncodingFromModel(model);
    this.encoder = getEncoding(this.encoderName);
    this.buffer = 50;
  }

  getEncodingFromModel(model) {
    try {
      return getEncodingNameForModel(model);
    } catch {
      return "cl100k_base";
    }
  }

  tokensFromString(input = "") {
    const tokens = this.encoder.encode(input);
    return tokens;
  }

  bytesFromTokens(tokens = []) {
    const bytes = this.encoder.decode(tokens);
    return bytes;
  }

  countFromString(input = "") {
    const tokens = this.encoder.encode(input);
    return tokens.length;
  }

  statsFrom(input) {
    if (typeof input === "string") return this.countFromString(input);

    // What is going on here?
    // https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb Item 6.
    // The only option is to estimate. From repeated testing using the static values in the code we are always 2 off,
    // which means as of Nov 1, 2023 the additional factor on ln: 476 changed from 3 to 5.
    if (Array.isArray(input)) {
      const perMessageFactorTokens = input.length * 3;
      const tokensFromContent = input.reduce(
        (a, b) => a + this.countFromString(b.content),
        0
      );
      const diffCoefficient = 5;
      return perMessageFactorTokens + tokensFromContent + diffCoefficient;
    }

    throw new Error("Not a supported tokenized format.");
  }
}

module.exports = {
  TokenManager,
};
Infinite prompt input and compression implementation (#332) * WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup 2023-11-06 22:13:53 +01:00			`const { getEncodingNameForModel, getEncoding } = require("js-tiktoken");`

			`class TokenManager {`
			`constructor(model = "gpt-3.5-turbo") {`
			`this.model = model;`
			`this.encoderName = this.getEncodingFromModel(model);`
			`this.encoder = getEncoding(this.encoderName);`
			`this.buffer = 50;`
			`}`

			`getEncodingFromModel(model) {`
			`try {`
			`return getEncodingNameForModel(model);`
			`} catch {`
			`return "cl100k_base";`
			`}`
			`}`

			`tokensFromString(input = "") {`
			`const tokens = this.encoder.encode(input);`
			`return tokens;`
			`}`

			`bytesFromTokens(tokens = []) {`
			`const bytes = this.encoder.decode(tokens);`
			`return bytes;`
			`}`

			`countFromString(input = "") {`
			`const tokens = this.encoder.encode(input);`
			`return tokens.length;`
			`}`

			`statsFrom(input) {`
			`if (typeof input === "string") return this.countFromString(input);`

			`// What is going on here?`
			`// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb Item 6.`
			`// The only option is to estimate. From repeated testing using the static values in the code we are always 2 off,`
			`// which means as of Nov 1, 2023 the additional factor on ln: 476 changed from 3 to 5.`
			`if (Array.isArray(input)) {`
			`const perMessageFactorTokens = input.length * 3;`
			`const tokensFromContent = input.reduce(`
			`(a, b) => a + this.countFromString(b.content),`
			`0`
			`);`
			`const diffCoefficient = 5;`
			`return perMessageFactorTokens + tokensFromContent + diffCoefficient;`
			`}`

			`throw new Error("Not a supported tokenized format.");`
			`}`
			`}`

			`module.exports = {`
			`TokenManager,`
			`};`