Girinath11 commited on
Commit
993af66
·
verified ·
1 Parent(s): c2b8ae0

Upload 4 files

Browse files
Files changed (4) hide show
  1. merges.txt +0 -0
  2. token_frequencies.pkl +3 -0
  3. tokenizer_config.json +107 -0
  4. vocab.json +0 -0
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
token_frequencies.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a7ae0615c05920ca290b769650eda0552e116598548e4cfb8398eab2452c1e7
3
+ size 371919
tokenizer_config.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 32000,
3
+ "min_freq": 2,
4
+ "special_tokens": {
5
+ "<pad>": 0,
6
+ "<unk>": 1,
7
+ "<bos>": 2,
8
+ "<eos>": 3,
9
+ "<system>": 4,
10
+ "<user>": 5,
11
+ "<assistant>": 6,
12
+ "<|endoftext|>": 7,
13
+ "<|newline|>": 8,
14
+ "<|tab|>": 9,
15
+ "<|code|>": 10,
16
+ "<|/code|>": 11,
17
+ "<|math|>": 12,
18
+ "<|/math|>": 13
19
+ },
20
+ "technical_terms": [
21
+ "protocol",
22
+ "backpropagation",
23
+ "object",
24
+ "async",
25
+ "validation",
26
+ "test",
27
+ "await",
28
+ "boolean",
29
+ "hypothesis",
30
+ "f1score",
31
+ "string",
32
+ "html",
33
+ "polynomial",
34
+ "neuron",
35
+ "class",
36
+ "loss",
37
+ "import",
38
+ "promise",
39
+ "graphql",
40
+ "export",
41
+ "authorization",
42
+ "proof",
43
+ "theorem",
44
+ "variable",
45
+ "authentication",
46
+ "https",
47
+ "regression",
48
+ "algorithm",
49
+ "security",
50
+ "architecture",
51
+ "function",
52
+ "accuracy",
53
+ "statistics",
54
+ "precision",
55
+ "css",
56
+ "weight",
57
+ "python",
58
+ "sql",
59
+ "attention",
60
+ "float",
61
+ "encryption",
62
+ "docker",
63
+ "gradient",
64
+ "datatype",
65
+ "transformer",
66
+ "java",
67
+ "formula",
68
+ "equation",
69
+ "network",
70
+ "vector",
71
+ "client",
72
+ "callback",
73
+ "integer",
74
+ "http",
75
+ "database",
76
+ "layer",
77
+ "javascript",
78
+ "bias",
79
+ "cpp",
80
+ "matrix",
81
+ "embedding",
82
+ "microservice",
83
+ "rest",
84
+ "derivative",
85
+ "recall",
86
+ "tokenization",
87
+ "xml",
88
+ "scalability",
89
+ "kubernetes",
90
+ "model",
91
+ "return",
92
+ "neural",
93
+ "parameter",
94
+ "server",
95
+ "probability",
96
+ "deployment",
97
+ "array",
98
+ "api",
99
+ "integral",
100
+ "training",
101
+ "forward",
102
+ "correlation",
103
+ "method",
104
+ "json",
105
+ "performance"
106
+ ]
107
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff