Xenova HF Staff commited on
Commit
ee0d9cd
·
verified ·
1 Parent(s): 0e59ae5

Upload optimized ONNX model (#1)

Browse files

- Upload optimized ONNX model (b69a4e7576e006f8b5af0fa15e8187c84f21af86)
- Delete onnx/model_bnb4.onnx (8940cfffaba4dfd3e4022f5a180467da2ae7bc76)
- Delete onnx/model_int8.onnx (5ff91e5ccacb0daddd929c849474b4a903bc6412)
- Delete onnx/model_uint8.onnx (917277aa018cd670cc7d40ef37a3c7ba62d74875)

.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text
37
+ onnx/model_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
38
+ onnx/model_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
39
+ onnx/model_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
40
+ onnx/model_quantized.onnx_data filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -1,12 +1,11 @@
1
  {
2
- "_attn_implementation_autoset": true,
3
- "_name_or_path": "PleIAs/Monad",
4
  "architectures": [
5
  "LlamaForCausalLM"
6
  ],
7
  "attention_bias": false,
8
  "attention_dropout": 0.0,
9
  "bos_token_id": 1,
 
10
  "eos_token_id": 2,
11
  "head_dim": 64,
12
  "hidden_act": "silu",
@@ -21,11 +20,26 @@
21
  "num_key_value_heads": 4,
22
  "pretraining_tp": 1,
23
  "rms_norm_eps": 1e-05,
24
- "rope_scaling": null,
 
 
 
25
  "rope_theta": 10000,
26
  "tie_word_embeddings": true,
27
- "torch_dtype": "float32",
28
- "transformers_version": "4.49.0",
29
  "use_cache": true,
30
- "vocab_size": 8192
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
 
 
2
  "architectures": [
3
  "LlamaForCausalLM"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 1,
8
+ "dtype": "bfloat16",
9
  "eos_token_id": 2,
10
  "head_dim": 64,
11
  "hidden_act": "silu",
 
20
  "num_key_value_heads": 4,
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-05,
23
+ "rope_parameters": {
24
+ "rope_theta": 10000,
25
+ "rope_type": "default"
26
+ },
27
  "rope_theta": 10000,
28
  "tie_word_embeddings": true,
29
+ "transformers_version": "5.0.0.dev0",
 
30
  "use_cache": true,
31
+ "vocab_size": 8192,
32
+ "transformers.js_config": {
33
+ "use_external_data_format": {
34
+ "model.onnx": 1,
35
+ "model_fp16.onnx": 1,
36
+ "model_quantized.onnx": 1,
37
+ "model_q4.onnx": 1,
38
+ "model_q4f16.onnx": 1
39
+ },
40
+ "kv_cache_dtype": {
41
+ "q4f16": "float16",
42
+ "fp16": "float16"
43
+ }
44
+ }
45
+ }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.49.0"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "5.0.0.dev0"
6
  }
onnx/model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8243735cd3ca8f725f87aa5dee6142bb077a8a1db3a1f64087dab8554bf6e8c4
3
- size 227902592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a6c06b0812fdf586e0cbdc3a8760bf7b91fd1c1e5aebebbbcdf21f066254a88
3
+ size 421087
onnx/model.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4264517ce25a2d32f4231bccf46c597e7822e11dae7f4d6b30d7c1c547ee61d0
3
+ size 227148800
onnx/model_fp16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b9402c399e2d1be8de2249c30d6b6c11123cb05248367f9b64f371bcb4f1d38
3
- size 114619221
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0059180a76a87e83130d30081ee3711d276128d19409f5ec9bffddb34e577e74
3
+ size 481133
onnx/model_fp16.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c2c742a6c8e7c8d87166f069ffa7af33a61741cb184fe8936501c98b3330578
3
+ size 113508352
onnx/model_q4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb6b89b38a81c43dd15860ea9b17760294e90ab7daac2771ef51f19f6cb8753e
3
- size 43944659
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a13355cecc75fde2ad45a842c3ecc1040a83408016d09d4695f8e5ea72608e6b
3
+ size 584663
onnx/{model_bnb4.onnx → model_q4.onnx_data} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bbb4bc65757e51e06f7831a75e00c8132e29f271de994262be19200869ebb85
3
- size 40539987
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93a042e3c6c88759b3db48a62bd43c585c95b3fcc3bb8233aebecc59e861afe3
3
+ size 43123712
onnx/model_q4f16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67000f2c517b89ed22ad425810fda68940962bb8ade11a26dd14d1de74e9516f
3
- size 36304936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820851cf9f94f7393d03cf9cbc983de743283e1fbf95b00aeb3df59862e291fb
3
+ size 645593
onnx/{model_int8.onnx → model_q4f16.onnx_data} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9c27ae76f508e5aeb8b1954446831b9e7bde5d2da5cbf027cbb674bcdb2f0bd
3
- size 58526168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1650aa9ce902ddfdaf97f33cb5281eb92770e18e8049e4fd14ff6746e5f9ff1d
3
+ size 35127296
onnx/model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9c27ae76f508e5aeb8b1954446831b9e7bde5d2da5cbf027cbb674bcdb2f0bd
3
- size 58526168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:497f7bf19a0b35603903927ffdc51bac927ca2954e110c6f5966d7485185c60e
3
+ size 1911967
onnx/{model_uint8.onnx → model_quantized.onnx_data} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fec7db87faa8668021649d4899249b32ea6525c734ceda71a8f1ed6635b47737
3
- size 58526397
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cde7ba8af8ea6201ba6d1d9de47f7843e73710406b47829831e8a35349353ca6
3
+ size 57279488
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -36,5 +36,5 @@
36
  "clean_up_tokenization_spaces": false,
37
  "extra_special_tokens": {},
38
  "model_max_length": 1000000000000000019884624838656,
39
- "tokenizer_class": "PreTrainedTokenizer"
40
- }
 
36
  "clean_up_tokenization_spaces": false,
37
  "extra_special_tokens": {},
38
  "model_max_length": 1000000000000000019884624838656,
39
+ "tokenizer_class": "PreTrainedTokenizerFast"
40
+ }