Transformers
ZhiyuanChen commited on
Commit
69702b4
·
verified ·
1 Parent(s): 4d6545c

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +3 -0
  2. vocab.txt +8 -8
tokenizer_config.json CHANGED
@@ -55,10 +55,13 @@
55
  "bos_token": "<cls>",
56
  "clean_up_tokenization_spaces": true,
57
  "cls_token": "<cls>",
 
58
  "eos_token": "<eos>",
59
  "mask_token": "<mask>",
60
  "model_max_length": 1000000000000000019884624838656,
 
61
  "pad_token": "<pad>",
 
62
  "sep_token": "<eos>",
63
  "tokenizer_class": "DnaTokenizer",
64
  "unk_token": "<unk>"
 
55
  "bos_token": "<cls>",
56
  "clean_up_tokenization_spaces": true,
57
  "cls_token": "<cls>",
58
+ "codon": false,
59
  "eos_token": "<eos>",
60
  "mask_token": "<mask>",
61
  "model_max_length": 1000000000000000019884624838656,
62
+ "nmers": 1,
63
  "pad_token": "<pad>",
64
+ "replace_U_with_T": true,
65
  "sep_token": "<eos>",
66
  "tokenizer_class": "DnaTokenizer",
67
  "unk_token": "<unk>"
vocab.txt CHANGED
@@ -9,17 +9,17 @@ C
9
  G
10
  T
11
  N
12
- X
13
- V
14
- H
15
- D
16
- B
17
- M
18
  R
19
- W
20
- S
21
  Y
 
 
22
  K
 
 
 
 
 
23
  .
 
24
  *
25
  -
 
9
  G
10
  T
11
  N
 
 
 
 
 
 
12
  R
 
 
13
  Y
14
+ S
15
+ W
16
  K
17
+ M
18
+ B
19
+ D
20
+ H
21
+ V
22
  .
23
+ X
24
  *
25
  -