Spaces:

helloya20
/

chat

Configuration error

App Files Files Community

chat / packages /api /src /utils /text.spec.ts

helloya20

Upload 2345 files

f0743f4 verified 2 months ago

raw

history blame contribute delete

25.8 kB

	import { processTextWithTokenLimit, TokenCountFn } from './text';
	import Tokenizer, { countTokens } from './tokenizer';

	jest.mock('@librechat/data-schemas', () => ({
	logger: {
	debug: jest.fn(),
	warn: jest.fn(),
	error: jest.fn(),
	},
	}));

	/**
	* OLD IMPLEMENTATION (Binary Search) - kept for comparison testing
	* This is the original algorithm that caused CPU spikes
	*/
	async function processTextWithTokenLimitOLD({
	text,
	tokenLimit,
	tokenCountFn,
	}: {
	text: string;
	tokenLimit: number;
	tokenCountFn: TokenCountFn;
	}): Promise<{ text: string; tokenCount: number; wasTruncated: boolean }> {
	const originalTokenCount = await tokenCountFn(text);

	if (originalTokenCount <= tokenLimit) {
	return {
	text,
	tokenCount: originalTokenCount,
	wasTruncated: false,
	};
	}

	let low = 0;
	let high = text.length;
	let bestText = '';

	while (low <= high) {
	const mid = Math.floor((low + high) / 2);
	const truncatedText = text.substring(0, mid);
	const tokenCount = await tokenCountFn(truncatedText);

	if (tokenCount <= tokenLimit) {
	bestText = truncatedText;
	low = mid + 1;
	} else {
	high = mid - 1;
	}
	}

	const finalTokenCount = await tokenCountFn(bestText);

	return {
	text: bestText,
	tokenCount: finalTokenCount,
	wasTruncated: true,
	};
	}

	/**
	* Creates a wrapper around Tokenizer.getTokenCount that tracks call count
	*/
	const createRealTokenCounter = () => {
	let callCount = 0;
	const tokenCountFn = (text: string): number => {
	callCount++;
	return Tokenizer.getTokenCount(text, 'cl100k_base');
	};
	return {
	tokenCountFn,
	getCallCount: () => callCount,
	resetCallCount: () => {
	callCount = 0;
	},
	};
	};

	/**
	* Creates a wrapper around the async countTokens function that tracks call count
	*/
	const createCountTokensCounter = () => {
	let callCount = 0;
	const tokenCountFn = async (text: string): Promise<number> => {
	callCount++;
	return countTokens(text);
	};
	return {
	tokenCountFn,
	getCallCount: () => callCount,
	resetCallCount: () => {
	callCount = 0;
	},
	};
	};

	describe('processTextWithTokenLimit', () => {
	/**
	* Creates a mock token count function that simulates realistic token counting.
	* Roughly 4 characters per token (common for English text).
	* Tracks call count to verify efficiency.
	*/
	const createMockTokenCounter = () => {
	let callCount = 0;
	const tokenCountFn = (text: string): number => {
	callCount++;
	return Math.ceil(text.length / 4);
	};
	return {
	tokenCountFn,
	getCallCount: () => callCount,
	resetCallCount: () => {
	callCount = 0;
	},
	};
	};

	/** Creates a string of specified character length */
	const createTextOfLength = (charLength: number): string => {
	return 'a'.repeat(charLength);
	};

	/** Creates realistic text content with varied token density */
	const createRealisticText = (approximateTokens: number): string => {
	const words = [
	'the',
	'quick',
	'brown',
	'fox',
	'jumps',
	'over',
	'lazy',
	'dog',
	'lorem',
	'ipsum',
	'dolor',
	'sit',
	'amet',
	'consectetur',
	'adipiscing',
	'elit',
	'sed',
	'do',
	'eiusmod',
	'tempor',
	'incididunt',
	'ut',
	'labore',
	'et',
	'dolore',
	'magna',
	'aliqua',
	'enim',
	'ad',
	'minim',
	'veniam',
	'authentication',
	'implementation',
	'configuration',
	'documentation',
	];
	const result: string[] = [];
	for (let i = 0; i < approximateTokens; i++) {
	result.push(words[i % words.length]);
	}
	return result.join(' ');
	};

	describe('tokenCountFn flexibility (sync and async)', () => {
	it('should work with synchronous tokenCountFn', async () => {
	const syncTokenCountFn = (text: string): number => Math.ceil(text.length / 4);
	const text = 'Hello, world! This is a test message.';
	const tokenLimit = 5;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: syncTokenCountFn,
	});

	expect(result.wasTruncated).toBe(true);
	expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
	});

	it('should work with asynchronous tokenCountFn', async () => {
	const asyncTokenCountFn = async (text: string): Promise<number> => {
	await new Promise((resolve) => setTimeout(resolve, 1));
	return Math.ceil(text.length / 4);
	};
	const text = 'Hello, world! This is a test message.';
	const tokenLimit = 5;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: asyncTokenCountFn,
	});

	expect(result.wasTruncated).toBe(true);
	expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
	});

	it('should produce equivalent results with sync and async tokenCountFn', async () => {
	const syncTokenCountFn = (text: string): number => Math.ceil(text.length / 4);
	const asyncTokenCountFn = async (text: string): Promise<number> => Math.ceil(text.length / 4);
	const text = 'a'.repeat(8000);
	const tokenLimit = 1000;

	const syncResult = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: syncTokenCountFn,
	});

	const asyncResult = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: asyncTokenCountFn,
	});

	expect(syncResult.tokenCount).toBe(asyncResult.tokenCount);
	expect(syncResult.wasTruncated).toBe(asyncResult.wasTruncated);
	expect(syncResult.text.length).toBe(asyncResult.text.length);
	});
	});

	describe('when text is under the token limit', () => {
	it('should return original text unchanged', async () => {
	const { tokenCountFn } = createMockTokenCounter();
	const text = 'Hello, world!';
	const tokenLimit = 100;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(result.text).toBe(text);
	expect(result.wasTruncated).toBe(false);
	});

	it('should return correct token count', async () => {
	const { tokenCountFn } = createMockTokenCounter();
	const text = 'Hello, world!';
	const tokenLimit = 100;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(result.tokenCount).toBe(Math.ceil(text.length / 4));
	});

	it('should only call tokenCountFn once when under limit', async () => {
	const { tokenCountFn, getCallCount } = createMockTokenCounter();
	const text = 'Hello, world!';
	const tokenLimit = 100;

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(getCallCount()).toBe(1);
	});
	});

	describe('when text is exactly at the token limit', () => {
	it('should return original text unchanged', async () => {
	const { tokenCountFn } = createMockTokenCounter();
	const text = createTextOfLength(400);
	const tokenLimit = 100;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(result.text).toBe(text);
	expect(result.wasTruncated).toBe(false);
	expect(result.tokenCount).toBe(tokenLimit);
	});
	});

	describe('when text exceeds the token limit', () => {
	it('should truncate text to fit within limit', async () => {
	const { tokenCountFn } = createMockTokenCounter();
	const text = createTextOfLength(8000);
	const tokenLimit = 1000;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(result.wasTruncated).toBe(true);
	expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
	expect(result.text.length).toBeLessThan(text.length);
	});

	it('should truncate text to be close to but not exceed the limit', async () => {
	const { tokenCountFn } = createMockTokenCounter();
	const text = createTextOfLength(8000);
	const tokenLimit = 1000;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
	expect(result.tokenCount).toBeGreaterThan(tokenLimit * 0.9);
	});
	});

	describe('efficiency - tokenCountFn call count', () => {
	it('should call tokenCountFn at most 7 times for large text (vs ~17 for binary search)', async () => {
	const { tokenCountFn, getCallCount } = createMockTokenCounter();
	const text = createTextOfLength(400000);
	const tokenLimit = 50000;

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(getCallCount()).toBeLessThanOrEqual(7);
	});

	it('should typically call tokenCountFn only 2-3 times for standard truncation', async () => {
	const { tokenCountFn, getCallCount } = createMockTokenCounter();
	const text = createTextOfLength(40000);
	const tokenLimit = 5000;

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(getCallCount()).toBeLessThanOrEqual(3);
	});

	it('should call tokenCountFn only once when text is under limit', async () => {
	const { tokenCountFn, getCallCount } = createMockTokenCounter();
	const text = createTextOfLength(1000);
	const tokenLimit = 10000;

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(getCallCount()).toBe(1);
	});

	it('should handle very large text (100k+ tokens) efficiently', async () => {
	const { tokenCountFn, getCallCount } = createMockTokenCounter();
	const text = createTextOfLength(500000);
	const tokenLimit = 100000;

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(getCallCount()).toBeLessThanOrEqual(7);
	});
	});

	describe('edge cases', () => {
	it('should handle empty text', async () => {
	const { tokenCountFn } = createMockTokenCounter();
	const text = '';
	const tokenLimit = 100;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(result.text).toBe('');
	expect(result.tokenCount).toBe(0);
	expect(result.wasTruncated).toBe(false);
	});

	it('should handle token limit of 1', async () => {
	const { tokenCountFn } = createMockTokenCounter();
	const text = createTextOfLength(1000);
	const tokenLimit = 1;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(result.wasTruncated).toBe(true);
	expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
	});

	it('should handle text that is just slightly over the limit', async () => {
	const { tokenCountFn } = createMockTokenCounter();
	const text = createTextOfLength(404);
	const tokenLimit = 100;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn,
	});

	expect(result.wasTruncated).toBe(true);
	expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
	});
	});

	describe('correctness with variable token density', () => {
	it('should handle text with varying token density', async () => {
	const variableDensityTokenCounter = (text: string): number => {
	const shortWords = (text.match(/\s+/g) \|\| []).length;
	return Math.ceil(text.length / 4) + shortWords;
	};

	const text = 'This is a test with many short words and some longer concatenated words too';
	const tokenLimit = 10;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: variableDensityTokenCounter,
	});

	expect(result.wasTruncated).toBe(true);
	expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
	});
	});

	describe('direct comparison with OLD binary search implementation', () => {
	it('should produce equivalent results to the old implementation', async () => {
	const oldCounter = createMockTokenCounter();
	const newCounter = createMockTokenCounter();
	const text = createTextOfLength(8000);
	const tokenLimit = 1000;

	const oldResult = await processTextWithTokenLimitOLD({
	text,
	tokenLimit,
	tokenCountFn: oldCounter.tokenCountFn,
	});

	const newResult = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: newCounter.tokenCountFn,
	});

	expect(newResult.wasTruncated).toBe(oldResult.wasTruncated);
	expect(newResult.tokenCount).toBeLessThanOrEqual(tokenLimit);
	expect(oldResult.tokenCount).toBeLessThanOrEqual(tokenLimit);
	});

	it('should use significantly fewer tokenCountFn calls than old implementation (400k chars)', async () => {
	const oldCounter = createMockTokenCounter();
	const newCounter = createMockTokenCounter();
	const text = createTextOfLength(400000);
	const tokenLimit = 50000;

	await processTextWithTokenLimitOLD({
	text,
	tokenLimit,
	tokenCountFn: oldCounter.tokenCountFn,
	});

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: newCounter.tokenCountFn,
	});

	const oldCalls = oldCounter.getCallCount();
	const newCalls = newCounter.getCallCount();

	console.log(
	`[400k chars] OLD implementation: ${oldCalls} calls, NEW implementation: ${newCalls} calls`,
	);
	console.log(`[400k chars] Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);

	expect(newCalls).toBeLessThan(oldCalls);
	expect(newCalls).toBeLessThanOrEqual(7);
	});

	it('should use significantly fewer tokenCountFn calls than old implementation (500k chars, 100k token limit)', async () => {
	const oldCounter = createMockTokenCounter();
	const newCounter = createMockTokenCounter();
	const text = createTextOfLength(500000);
	const tokenLimit = 100000;

	await processTextWithTokenLimitOLD({
	text,
	tokenLimit,
	tokenCountFn: oldCounter.tokenCountFn,
	});

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: newCounter.tokenCountFn,
	});

	const oldCalls = oldCounter.getCallCount();
	const newCalls = newCounter.getCallCount();

	console.log(
	`[500k chars] OLD implementation: ${oldCalls} calls, NEW implementation: ${newCalls} calls`,
	);
	console.log(`[500k chars] Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);

	expect(newCalls).toBeLessThan(oldCalls);
	});

	it('should achieve at least 70% reduction in tokenCountFn calls', async () => {
	const oldCounter = createMockTokenCounter();
	const newCounter = createMockTokenCounter();
	const text = createTextOfLength(500000);
	const tokenLimit = 100000;

	await processTextWithTokenLimitOLD({
	text,
	tokenLimit,
	tokenCountFn: oldCounter.tokenCountFn,
	});

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: newCounter.tokenCountFn,
	});

	const oldCalls = oldCounter.getCallCount();
	const newCalls = newCounter.getCallCount();
	const reduction = 1 - newCalls / oldCalls;

	console.log(
	`Efficiency improvement: ${(reduction * 100).toFixed(1)}% fewer tokenCountFn calls`,
	);

	expect(reduction).toBeGreaterThanOrEqual(0.7);
	});

	it('should simulate the reported scenario (122k tokens, 100k limit)', async () => {
	const oldCounter = createMockTokenCounter();
	const newCounter = createMockTokenCounter();
	const text = createTextOfLength(489564);
	const tokenLimit = 100000;

	await processTextWithTokenLimitOLD({
	text,
	tokenLimit,
	tokenCountFn: oldCounter.tokenCountFn,
	});

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: newCounter.tokenCountFn,
	});

	const oldCalls = oldCounter.getCallCount();
	const newCalls = newCounter.getCallCount();

	console.log(`[User reported scenario: ~122k tokens]`);
	console.log(`OLD implementation: ${oldCalls} tokenCountFn calls`);
	console.log(`NEW implementation: ${newCalls} tokenCountFn calls`);
	console.log(`Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);

	expect(newCalls).toBeLessThan(oldCalls);
	expect(newCalls).toBeLessThanOrEqual(7);
	});
	});

	describe('direct comparison with REAL tiktoken tokenizer', () => {
	beforeEach(() => {
	Tokenizer.freeAndResetAllEncoders();
	});

	it('should produce valid truncation with real tokenizer', async () => {
	const counter = createRealTokenCounter();
	const text = createRealisticText(5000);
	const tokenLimit = 1000;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: counter.tokenCountFn,
	});

	expect(result.wasTruncated).toBe(true);
	expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
	expect(result.text.length).toBeLessThan(text.length);
	});

	it('should use fewer tiktoken calls than old implementation (realistic text)', async () => {
	const oldCounter = createRealTokenCounter();
	const newCounter = createRealTokenCounter();
	const text = createRealisticText(15000);
	const tokenLimit = 5000;

	await processTextWithTokenLimitOLD({
	text,
	tokenLimit,
	tokenCountFn: oldCounter.tokenCountFn,
	});

	Tokenizer.freeAndResetAllEncoders();

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: newCounter.tokenCountFn,
	});

	const oldCalls = oldCounter.getCallCount();
	const newCalls = newCounter.getCallCount();

	console.log(`[Real tiktoken ~15k tokens] OLD: ${oldCalls} calls, NEW: ${newCalls} calls`);
	console.log(`[Real tiktoken] Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);

	expect(newCalls).toBeLessThan(oldCalls);
	});

	it('should handle the reported user scenario with real tokenizer (~120k tokens)', async () => {
	const oldCounter = createRealTokenCounter();
	const newCounter = createRealTokenCounter();
	const text = createRealisticText(120000);
	const tokenLimit = 100000;

	const startOld = performance.now();
	await processTextWithTokenLimitOLD({
	text,
	tokenLimit,
	tokenCountFn: oldCounter.tokenCountFn,
	});
	const timeOld = performance.now() - startOld;

	Tokenizer.freeAndResetAllEncoders();

	const startNew = performance.now();
	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: newCounter.tokenCountFn,
	});
	const timeNew = performance.now() - startNew;

	const oldCalls = oldCounter.getCallCount();
	const newCalls = newCounter.getCallCount();

	console.log(`\n[REAL TIKTOKEN - User reported scenario: ~120k tokens]`);
	console.log(`OLD implementation: ${oldCalls} tiktoken calls, ${timeOld.toFixed(0)}ms`);
	console.log(`NEW implementation: ${newCalls} tiktoken calls, ${timeNew.toFixed(0)}ms`);
	console.log(`Call reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);
	console.log(`Time reduction: ${((1 - timeNew / timeOld) * 100).toFixed(1)}%`);
	console.log(
	`Result: truncated=${result.wasTruncated}, tokens=${result.tokenCount}/${tokenLimit}\n`,
	);

	expect(newCalls).toBeLessThan(oldCalls);
	expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
	expect(newCalls).toBeLessThanOrEqual(7);
	});

	it('should achieve at least 70% reduction with real tokenizer', async () => {
	const oldCounter = createRealTokenCounter();
	const newCounter = createRealTokenCounter();
	const text = createRealisticText(50000);
	const tokenLimit = 10000;

	await processTextWithTokenLimitOLD({
	text,
	tokenLimit,
	tokenCountFn: oldCounter.tokenCountFn,
	});

	Tokenizer.freeAndResetAllEncoders();

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: newCounter.tokenCountFn,
	});

	const oldCalls = oldCounter.getCallCount();
	const newCalls = newCounter.getCallCount();
	const reduction = 1 - newCalls / oldCalls;

	console.log(
	`[Real tiktoken 50k tokens] OLD: ${oldCalls}, NEW: ${newCalls}, Reduction: ${(reduction * 100).toFixed(1)}%`,
	);

	expect(reduction).toBeGreaterThanOrEqual(0.7);
	});
	});

	describe('using countTokens async function from @librechat/api', () => {
	beforeEach(() => {
	Tokenizer.freeAndResetAllEncoders();
	});

	it('countTokens should return correct token count', async () => {
	const text = 'Hello, world!';
	const count = await countTokens(text);

	expect(count).toBeGreaterThan(0);
	expect(typeof count).toBe('number');
	});

	it('countTokens should handle empty string', async () => {
	const count = await countTokens('');
	expect(count).toBe(0);
	});

	it('should work with processTextWithTokenLimit using countTokens', async () => {
	const counter = createCountTokensCounter();
	const text = createRealisticText(5000);
	const tokenLimit = 1000;

	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: counter.tokenCountFn,
	});

	expect(result.wasTruncated).toBe(true);
	expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
	expect(result.text.length).toBeLessThan(text.length);
	});

	it('should use fewer countTokens calls than old implementation', async () => {
	const oldCounter = createCountTokensCounter();
	const newCounter = createCountTokensCounter();
	const text = createRealisticText(15000);
	const tokenLimit = 5000;

	await processTextWithTokenLimitOLD({
	text,
	tokenLimit,
	tokenCountFn: oldCounter.tokenCountFn,
	});

	Tokenizer.freeAndResetAllEncoders();

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: newCounter.tokenCountFn,
	});

	const oldCalls = oldCounter.getCallCount();
	const newCalls = newCounter.getCallCount();

	console.log(`[countTokens ~15k tokens] OLD: ${oldCalls} calls, NEW: ${newCalls} calls`);
	console.log(`[countTokens] Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);

	expect(newCalls).toBeLessThan(oldCalls);
	});

	it('should handle user reported scenario with countTokens (~120k tokens)', async () => {
	const oldCounter = createCountTokensCounter();
	const newCounter = createCountTokensCounter();
	const text = createRealisticText(120000);
	const tokenLimit = 100000;

	const startOld = performance.now();
	await processTextWithTokenLimitOLD({
	text,
	tokenLimit,
	tokenCountFn: oldCounter.tokenCountFn,
	});
	const timeOld = performance.now() - startOld;

	Tokenizer.freeAndResetAllEncoders();

	const startNew = performance.now();
	const result = await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: newCounter.tokenCountFn,
	});
	const timeNew = performance.now() - startNew;

	const oldCalls = oldCounter.getCallCount();
	const newCalls = newCounter.getCallCount();

	console.log(`\n[countTokens - User reported scenario: ~120k tokens]`);
	console.log(`OLD implementation: ${oldCalls} countTokens calls, ${timeOld.toFixed(0)}ms`);
	console.log(`NEW implementation: ${newCalls} countTokens calls, ${timeNew.toFixed(0)}ms`);
	console.log(`Call reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);
	console.log(`Time reduction: ${((1 - timeNew / timeOld) * 100).toFixed(1)}%`);
	console.log(
	`Result: truncated=${result.wasTruncated}, tokens=${result.tokenCount}/${tokenLimit}\n`,
	);

	expect(newCalls).toBeLessThan(oldCalls);
	expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
	expect(newCalls).toBeLessThanOrEqual(7);
	});

	it('should achieve at least 70% reduction with countTokens', async () => {
	const oldCounter = createCountTokensCounter();
	const newCounter = createCountTokensCounter();
	const text = createRealisticText(50000);
	const tokenLimit = 10000;

	await processTextWithTokenLimitOLD({
	text,
	tokenLimit,
	tokenCountFn: oldCounter.tokenCountFn,
	});

	Tokenizer.freeAndResetAllEncoders();

	await processTextWithTokenLimit({
	text,
	tokenLimit,
	tokenCountFn: newCounter.tokenCountFn,
	});

	const oldCalls = oldCounter.getCallCount();
	const newCalls = newCounter.getCallCount();
	const reduction = 1 - newCalls / oldCalls;

	console.log(
	`[countTokens 50k tokens] OLD: ${oldCalls}, NEW: ${newCalls}, Reduction: ${(reduction * 100).toFixed(1)}%`,
	);

	expect(reduction).toBeGreaterThanOrEqual(0.7);
	});
	});
	});