Update file tokenizer.h
Browse files- c_tokenizer/tokenizer.h +5 -3
c_tokenizer/tokenizer.h
CHANGED
|
@@ -35,17 +35,19 @@ struct Tokenizer {
|
|
| 35 |
|
| 36 |
static uint16_t TokenizerGetVocabSize() { return (_binary_tokenizer_bin_end - _binary_tokenizer_bin_start) / 3; }
|
| 37 |
|
|
|
|
| 38 |
#include <stdlib.h>
|
| 39 |
#include <unistd.h>
|
| 40 |
#include <fcntl.h>
|
| 41 |
#include <stdio.h>
|
|
|
|
| 42 |
|
| 43 |
static uint16_t TokenizerFind(Tokenizer *tokenizer, uint8_t byte, uint16_t prev) {
|
| 44 |
|
| 45 |
-
for (int i = 0; i < tokenizer->get_vocab_size(); ++i)
|
| 46 |
-
|
| 47 |
|
| 48 |
-
exit(0);
|
| 49 |
|
| 50 |
for (uint16_t i = prev; i < tokenizer->get_vocab_size(); ++i)
|
| 51 |
if (tokenizer->vocab[i].byte == byte && tokenizer->vocab[i].prev == prev)
|
|
|
|
| 35 |
|
| 36 |
static uint16_t TokenizerGetVocabSize() { return (_binary_tokenizer_bin_end - _binary_tokenizer_bin_start) / 3; }
|
| 37 |
|
| 38 |
+
/*
|
| 39 |
#include <stdlib.h>
|
| 40 |
#include <unistd.h>
|
| 41 |
#include <fcntl.h>
|
| 42 |
#include <stdio.h>
|
| 43 |
+
*/
|
| 44 |
|
| 45 |
static uint16_t TokenizerFind(Tokenizer *tokenizer, uint8_t byte, uint16_t prev) {
|
| 46 |
|
| 47 |
+
//for (int i = 0; i < tokenizer->get_vocab_size(); ++i)
|
| 48 |
+
// printf("token %d: (%c, %d)\n", i, tokenizer->vocab[i].byte, tokenizer->vocab[i].prev);
|
| 49 |
|
| 50 |
+
//exit(0);
|
| 51 |
|
| 52 |
for (uint16_t i = prev; i < tokenizer->get_vocab_size(); ++i)
|
| 53 |
if (tokenizer->vocab[i].byte == byte && tokenizer->vocab[i].prev == prev)
|