Update file trainer.cli.py
Browse files- trainer.cli.py +1 -1
trainer.cli.py
CHANGED
|
@@ -30,7 +30,7 @@ if __name__ == '__main__':
|
|
| 30 |
dataset = Dataset(config.dataset)
|
| 31 |
|
| 32 |
tokenizer = Tokenizer()
|
| 33 |
-
|
| 34 |
|
| 35 |
ids = tokenizer.c_encode(dataset.text)
|
| 36 |
|
|
|
|
| 30 |
dataset = Dataset(config.dataset)
|
| 31 |
|
| 32 |
tokenizer = Tokenizer()
|
| 33 |
+
tokenizer.train(dataset.text, max_length=config.tokenizer.max_length)
|
| 34 |
|
| 35 |
ids = tokenizer.c_encode(dataset.text)
|
| 36 |
|