flpelerin commited on
Commit
80437f3
·
1 Parent(s): 1907275

Update 3 files

Browse files

- /trainer.py
- /trainer.cli.py
- /tokenizer.py

Files changed (3) hide show
  1. tokenizer.py +1 -1
  2. trainer.cli.py +5 -1
  3. trainer.py +4 -1
tokenizer.py CHANGED
@@ -145,4 +145,4 @@ class Tokenizer:
145
 
146
 
147
  def c_encode(self, text): #TODO: Implement
148
- return []
 
145
 
146
 
147
  def c_encode(self, text): #TODO: Implement
148
+ return [1, 2, 3, 4]
trainer.cli.py CHANGED
@@ -31,8 +31,12 @@ if __name__ == '__main__':
31
  tokenizer.train(dataset.text, max_length=config.tokenizer.max_length)
32
  ids = tokenizer.c_encode(dataset.text)
33
 
 
34
  dataset += ids
35
- dataset.batch(ids)
 
 
 
36
 
37
  trainer = Trainer(config)
38
  trainer.train(dataset)
 
31
  tokenizer.train(dataset.text, max_length=config.tokenizer.max_length)
32
  ids = tokenizer.c_encode(dataset.text)
33
 
34
+
35
  dataset += ids
36
+ #dataset.batch(ids)
37
+
38
+ print(f"dataset ids: {dataset.ids}")
39
+
40
 
41
  trainer = Trainer(config)
42
  trainer.train(dataset)
trainer.py CHANGED
@@ -11,4 +11,7 @@ class Trainer:
11
  #self.wandb = Wandb(config.wandb)
12
 
13
  self.model = Model(config.model)
14
-
 
 
 
 
11
  #self.wandb = Wandb(config.wandb)
12
 
13
  self.model = Model(config.model)
14
+
15
+
16
+ def train(self, dataset): # TODO: Implement
17
+ pass