Upload folder using huggingface_hub
Browse files- config.json +1 -33
- model-00001-of-00010.safetensors +2 -2
- model-00002-of-00010.safetensors +2 -2
- model-00003-of-00010.safetensors +2 -2
- model-00004-of-00010.safetensors +2 -2
- model-00005-of-00010.safetensors +2 -2
- model-00006-of-00010.safetensors +2 -2
- model-00007-of-00010.safetensors +2 -2
- model-00008-of-00010.safetensors +2 -2
- model-00009-of-00010.safetensors +2 -2
- model-00010-of-00010.safetensors +2 -2
- model.safetensors.index.json +220 -156
- tokenizer_config.json +3 -3
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/model/mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 3 |
"architectures": [
|
| 4 |
"MixtralForCausalLM"
|
| 5 |
],
|
|
@@ -30,69 +30,37 @@
|
|
| 30 |
"weight_merge_groups": null
|
| 31 |
},
|
| 32 |
"ignored_layers": [
|
| 33 |
-
"model.layers.0.self_attn.o_proj",
|
| 34 |
"model.layers.0.block_sparse_moe.gate",
|
| 35 |
-
"model.layers.1.self_attn.o_proj",
|
| 36 |
"model.layers.1.block_sparse_moe.gate",
|
| 37 |
-
"model.layers.2.self_attn.o_proj",
|
| 38 |
"model.layers.2.block_sparse_moe.gate",
|
| 39 |
-
"model.layers.3.self_attn.o_proj",
|
| 40 |
"model.layers.3.block_sparse_moe.gate",
|
| 41 |
-
"model.layers.4.self_attn.o_proj",
|
| 42 |
"model.layers.4.block_sparse_moe.gate",
|
| 43 |
-
"model.layers.5.self_attn.o_proj",
|
| 44 |
"model.layers.5.block_sparse_moe.gate",
|
| 45 |
-
"model.layers.6.self_attn.o_proj",
|
| 46 |
"model.layers.6.block_sparse_moe.gate",
|
| 47 |
-
"model.layers.7.self_attn.o_proj",
|
| 48 |
"model.layers.7.block_sparse_moe.gate",
|
| 49 |
-
"model.layers.8.self_attn.o_proj",
|
| 50 |
"model.layers.8.block_sparse_moe.gate",
|
| 51 |
-
"model.layers.9.self_attn.o_proj",
|
| 52 |
"model.layers.9.block_sparse_moe.gate",
|
| 53 |
-
"model.layers.10.self_attn.o_proj",
|
| 54 |
"model.layers.10.block_sparse_moe.gate",
|
| 55 |
-
"model.layers.11.self_attn.o_proj",
|
| 56 |
"model.layers.11.block_sparse_moe.gate",
|
| 57 |
-
"model.layers.12.self_attn.o_proj",
|
| 58 |
"model.layers.12.block_sparse_moe.gate",
|
| 59 |
-
"model.layers.13.self_attn.o_proj",
|
| 60 |
"model.layers.13.block_sparse_moe.gate",
|
| 61 |
-
"model.layers.14.self_attn.o_proj",
|
| 62 |
"model.layers.14.block_sparse_moe.gate",
|
| 63 |
-
"model.layers.15.self_attn.o_proj",
|
| 64 |
"model.layers.15.block_sparse_moe.gate",
|
| 65 |
-
"model.layers.16.self_attn.o_proj",
|
| 66 |
"model.layers.16.block_sparse_moe.gate",
|
| 67 |
-
"model.layers.17.self_attn.o_proj",
|
| 68 |
"model.layers.17.block_sparse_moe.gate",
|
| 69 |
-
"model.layers.18.self_attn.o_proj",
|
| 70 |
"model.layers.18.block_sparse_moe.gate",
|
| 71 |
-
"model.layers.19.self_attn.o_proj",
|
| 72 |
"model.layers.19.block_sparse_moe.gate",
|
| 73 |
-
"model.layers.20.self_attn.o_proj",
|
| 74 |
"model.layers.20.block_sparse_moe.gate",
|
| 75 |
-
"model.layers.21.self_attn.o_proj",
|
| 76 |
"model.layers.21.block_sparse_moe.gate",
|
| 77 |
-
"model.layers.22.self_attn.o_proj",
|
| 78 |
"model.layers.22.block_sparse_moe.gate",
|
| 79 |
-
"model.layers.23.self_attn.o_proj",
|
| 80 |
"model.layers.23.block_sparse_moe.gate",
|
| 81 |
-
"model.layers.24.self_attn.o_proj",
|
| 82 |
"model.layers.24.block_sparse_moe.gate",
|
| 83 |
-
"model.layers.25.self_attn.o_proj",
|
| 84 |
"model.layers.25.block_sparse_moe.gate",
|
| 85 |
-
"model.layers.26.self_attn.o_proj",
|
| 86 |
"model.layers.26.block_sparse_moe.gate",
|
| 87 |
-
"model.layers.27.self_attn.o_proj",
|
| 88 |
"model.layers.27.block_sparse_moe.gate",
|
| 89 |
-
"model.layers.28.self_attn.o_proj",
|
| 90 |
"model.layers.28.block_sparse_moe.gate",
|
| 91 |
-
"model.layers.29.self_attn.o_proj",
|
| 92 |
"model.layers.29.block_sparse_moe.gate",
|
| 93 |
-
"model.layers.30.self_attn.o_proj",
|
| 94 |
"model.layers.30.block_sparse_moe.gate",
|
| 95 |
-
"model.layers.31.self_attn.o_proj",
|
| 96 |
"model.layers.31.block_sparse_moe.gate",
|
| 97 |
"lm_head"
|
| 98 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/model/mistralai/Mixtral-8x7B-Instruct-v0.1-MLCommons",
|
| 3 |
"architectures": [
|
| 4 |
"MixtralForCausalLM"
|
| 5 |
],
|
|
|
|
| 30 |
"weight_merge_groups": null
|
| 31 |
},
|
| 32 |
"ignored_layers": [
|
|
|
|
| 33 |
"model.layers.0.block_sparse_moe.gate",
|
|
|
|
| 34 |
"model.layers.1.block_sparse_moe.gate",
|
|
|
|
| 35 |
"model.layers.2.block_sparse_moe.gate",
|
|
|
|
| 36 |
"model.layers.3.block_sparse_moe.gate",
|
|
|
|
| 37 |
"model.layers.4.block_sparse_moe.gate",
|
|
|
|
| 38 |
"model.layers.5.block_sparse_moe.gate",
|
|
|
|
| 39 |
"model.layers.6.block_sparse_moe.gate",
|
|
|
|
| 40 |
"model.layers.7.block_sparse_moe.gate",
|
|
|
|
| 41 |
"model.layers.8.block_sparse_moe.gate",
|
|
|
|
| 42 |
"model.layers.9.block_sparse_moe.gate",
|
|
|
|
| 43 |
"model.layers.10.block_sparse_moe.gate",
|
|
|
|
| 44 |
"model.layers.11.block_sparse_moe.gate",
|
|
|
|
| 45 |
"model.layers.12.block_sparse_moe.gate",
|
|
|
|
| 46 |
"model.layers.13.block_sparse_moe.gate",
|
|
|
|
| 47 |
"model.layers.14.block_sparse_moe.gate",
|
|
|
|
| 48 |
"model.layers.15.block_sparse_moe.gate",
|
|
|
|
| 49 |
"model.layers.16.block_sparse_moe.gate",
|
|
|
|
| 50 |
"model.layers.17.block_sparse_moe.gate",
|
|
|
|
| 51 |
"model.layers.18.block_sparse_moe.gate",
|
|
|
|
| 52 |
"model.layers.19.block_sparse_moe.gate",
|
|
|
|
| 53 |
"model.layers.20.block_sparse_moe.gate",
|
|
|
|
| 54 |
"model.layers.21.block_sparse_moe.gate",
|
|
|
|
| 55 |
"model.layers.22.block_sparse_moe.gate",
|
|
|
|
| 56 |
"model.layers.23.block_sparse_moe.gate",
|
|
|
|
| 57 |
"model.layers.24.block_sparse_moe.gate",
|
|
|
|
| 58 |
"model.layers.25.block_sparse_moe.gate",
|
|
|
|
| 59 |
"model.layers.26.block_sparse_moe.gate",
|
|
|
|
| 60 |
"model.layers.27.block_sparse_moe.gate",
|
|
|
|
| 61 |
"model.layers.28.block_sparse_moe.gate",
|
|
|
|
| 62 |
"model.layers.29.block_sparse_moe.gate",
|
|
|
|
| 63 |
"model.layers.30.block_sparse_moe.gate",
|
|
|
|
| 64 |
"model.layers.31.block_sparse_moe.gate",
|
| 65 |
"lm_head"
|
| 66 |
],
|
model-00001-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ecd14ec5d85f2c066dbb7f51ef8be30e0b08a203265450b45eed2a63acb1ac8
|
| 3 |
+
size 4951724404
|
model-00002-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3aa6b0680351be68dcee66544094e9ca1639bb00809a922e1eb278d2190b23ae
|
| 3 |
+
size 4999892088
|
model-00003-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15d66cf16d3f70e368d03a4f14adde83680aab0eb8fa77cf9855df6f7c056b72
|
| 3 |
+
size 4983198624
|
model-00004-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68a5200086c3543ead430588b88bd9ee6bd733204d2cdeb4a9ab33e216cb00d8
|
| 3 |
+
size 4999892344
|
model-00005-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b073cb23401fa7aed985e96fca66b69cc33f463d52afb9c7a4027bbd6b07ccf6
|
| 3 |
+
size 4999909308
|
model-00006-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02325fcd4cbeead017c342bc0dffc84611bcc1db2d174672e4534efeea72a929
|
| 3 |
+
size 4983181812
|
model-00007-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2420bbfc5134419522aefe880a730c69ac82bcb7a754cfe63857347debd4f769
|
| 3 |
+
size 4999892472
|
model-00008-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d7e3cb3975f378758fb6a74c41674d694dc60ef62ed7220494b0ef04d4ffe20
|
| 3 |
+
size 4983198840
|
model-00009-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d881a918e94bb35844a2373251643d2c75940078ec9bb7fc345f0361e2434c0
|
| 3 |
+
size 4999892424
|
model-00010-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:874b967850a292cc1c0acbbcce7fb48286bea2346ed46567aead9b47948e9ad9
|
| 3 |
+
size 2065815212
|
model.safetensors.index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_size":
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"lm_head.weight": "model-00010-of-00010.safetensors",
|
|
@@ -84,7 +84,9 @@
|
|
| 84 |
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
| 85 |
"model.layers.0.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 86 |
"model.layers.0.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
|
|
|
| 87 |
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
|
|
|
| 88 |
"model.layers.0.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
| 89 |
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
| 90 |
"model.layers.0.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
|
@@ -171,7 +173,9 @@
|
|
| 171 |
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
| 172 |
"model.layers.1.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 173 |
"model.layers.1.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
|
|
|
| 174 |
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
|
|
|
| 175 |
"model.layers.1.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
| 176 |
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
| 177 |
"model.layers.1.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
|
@@ -179,12 +183,12 @@
|
|
| 179 |
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
| 180 |
"model.layers.1.self_attn.v_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 181 |
"model.layers.1.self_attn.v_scale": "model-00001-of-00010.safetensors",
|
| 182 |
-
"model.layers.10.block_sparse_moe.experts.0.w1.input_scale": "model-
|
| 183 |
-
"model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-
|
| 184 |
-
"model.layers.10.block_sparse_moe.experts.0.w1.weight_scale": "model-
|
| 185 |
-
"model.layers.10.block_sparse_moe.experts.0.w2.input_scale": "model-
|
| 186 |
-
"model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-
|
| 187 |
-
"model.layers.10.block_sparse_moe.experts.0.w2.weight_scale": "model-
|
| 188 |
"model.layers.10.block_sparse_moe.experts.0.w3.input_scale": "model-00004-of-00010.safetensors",
|
| 189 |
"model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00010.safetensors",
|
| 190 |
"model.layers.10.block_sparse_moe.experts.0.w3.weight_scale": "model-00004-of-00010.safetensors",
|
|
@@ -251,20 +255,22 @@
|
|
| 251 |
"model.layers.10.block_sparse_moe.experts.7.w3.input_scale": "model-00004-of-00010.safetensors",
|
| 252 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00010.safetensors",
|
| 253 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight_scale": "model-00004-of-00010.safetensors",
|
| 254 |
-
"model.layers.10.block_sparse_moe.gate.weight": "model-
|
| 255 |
"model.layers.10.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
| 256 |
"model.layers.10.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
| 257 |
-
"model.layers.10.self_attn.k_proj.input_scale": "model-
|
| 258 |
-
"model.layers.10.self_attn.k_proj.weight": "model-
|
| 259 |
-
"model.layers.10.self_attn.k_proj.weight_scale": "model-
|
| 260 |
"model.layers.10.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
| 261 |
-
"model.layers.10.self_attn.o_proj.
|
| 262 |
-
"model.layers.10.self_attn.
|
| 263 |
-
"model.layers.10.self_attn.
|
| 264 |
-
"model.layers.10.self_attn.q_proj.
|
| 265 |
-
"model.layers.10.self_attn.
|
| 266 |
-
"model.layers.10.self_attn.
|
| 267 |
-
"model.layers.10.self_attn.v_proj.
|
|
|
|
|
|
|
| 268 |
"model.layers.10.self_attn.v_scale": "model-00003-of-00010.safetensors",
|
| 269 |
"model.layers.11.block_sparse_moe.experts.0.w1.input_scale": "model-00004-of-00010.safetensors",
|
| 270 |
"model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00010.safetensors",
|
|
@@ -345,7 +351,9 @@
|
|
| 345 |
"model.layers.11.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
| 346 |
"model.layers.11.self_attn.k_proj.weight_scale": "model-00004-of-00010.safetensors",
|
| 347 |
"model.layers.11.self_attn.k_scale": "model-00004-of-00010.safetensors",
|
|
|
|
| 348 |
"model.layers.11.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
|
|
|
| 349 |
"model.layers.11.self_attn.q_proj.input_scale": "model-00004-of-00010.safetensors",
|
| 350 |
"model.layers.11.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
| 351 |
"model.layers.11.self_attn.q_proj.weight_scale": "model-00004-of-00010.safetensors",
|
|
@@ -432,7 +440,9 @@
|
|
| 432 |
"model.layers.12.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
| 433 |
"model.layers.12.self_attn.k_proj.weight_scale": "model-00004-of-00010.safetensors",
|
| 434 |
"model.layers.12.self_attn.k_scale": "model-00004-of-00010.safetensors",
|
|
|
|
| 435 |
"model.layers.12.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
|
|
|
| 436 |
"model.layers.12.self_attn.q_proj.input_scale": "model-00004-of-00010.safetensors",
|
| 437 |
"model.layers.12.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
| 438 |
"model.layers.12.self_attn.q_proj.weight_scale": "model-00004-of-00010.safetensors",
|
|
@@ -467,18 +477,18 @@
|
|
| 467 |
"model.layers.13.block_sparse_moe.experts.2.w3.input_scale": "model-00004-of-00010.safetensors",
|
| 468 |
"model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00010.safetensors",
|
| 469 |
"model.layers.13.block_sparse_moe.experts.2.w3.weight_scale": "model-00004-of-00010.safetensors",
|
| 470 |
-
"model.layers.13.block_sparse_moe.experts.3.w1.input_scale": "model-
|
| 471 |
-
"model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-
|
| 472 |
-
"model.layers.13.block_sparse_moe.experts.3.w1.weight_scale": "model-
|
| 473 |
-
"model.layers.13.block_sparse_moe.experts.3.w2.input_scale": "model-
|
| 474 |
-
"model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-
|
| 475 |
-
"model.layers.13.block_sparse_moe.experts.3.w2.weight_scale": "model-
|
| 476 |
-
"model.layers.13.block_sparse_moe.experts.3.w3.input_scale": "model-
|
| 477 |
-
"model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-
|
| 478 |
-
"model.layers.13.block_sparse_moe.experts.3.w3.weight_scale": "model-
|
| 479 |
-
"model.layers.13.block_sparse_moe.experts.4.w1.input_scale": "model-
|
| 480 |
-
"model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-
|
| 481 |
-
"model.layers.13.block_sparse_moe.experts.4.w1.weight_scale": "model-
|
| 482 |
"model.layers.13.block_sparse_moe.experts.4.w2.input_scale": "model-00005-of-00010.safetensors",
|
| 483 |
"model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors",
|
| 484 |
"model.layers.13.block_sparse_moe.experts.4.w2.weight_scale": "model-00005-of-00010.safetensors",
|
|
@@ -519,7 +529,9 @@
|
|
| 519 |
"model.layers.13.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
| 520 |
"model.layers.13.self_attn.k_proj.weight_scale": "model-00004-of-00010.safetensors",
|
| 521 |
"model.layers.13.self_attn.k_scale": "model-00004-of-00010.safetensors",
|
|
|
|
| 522 |
"model.layers.13.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
|
|
|
| 523 |
"model.layers.13.self_attn.q_proj.input_scale": "model-00004-of-00010.safetensors",
|
| 524 |
"model.layers.13.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
| 525 |
"model.layers.13.self_attn.q_proj.weight_scale": "model-00004-of-00010.safetensors",
|
|
@@ -606,7 +618,9 @@
|
|
| 606 |
"model.layers.14.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
| 607 |
"model.layers.14.self_attn.k_proj.weight_scale": "model-00005-of-00010.safetensors",
|
| 608 |
"model.layers.14.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
|
|
|
| 609 |
"model.layers.14.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
|
|
|
| 610 |
"model.layers.14.self_attn.q_proj.input_scale": "model-00005-of-00010.safetensors",
|
| 611 |
"model.layers.14.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
| 612 |
"model.layers.14.self_attn.q_proj.weight_scale": "model-00005-of-00010.safetensors",
|
|
@@ -693,7 +707,9 @@
|
|
| 693 |
"model.layers.15.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
| 694 |
"model.layers.15.self_attn.k_proj.weight_scale": "model-00005-of-00010.safetensors",
|
| 695 |
"model.layers.15.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
|
|
|
| 696 |
"model.layers.15.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
|
|
|
| 697 |
"model.layers.15.self_attn.q_proj.input_scale": "model-00005-of-00010.safetensors",
|
| 698 |
"model.layers.15.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
| 699 |
"model.layers.15.self_attn.q_proj.weight_scale": "model-00005-of-00010.safetensors",
|
|
@@ -758,29 +774,31 @@
|
|
| 758 |
"model.layers.16.block_sparse_moe.experts.6.w1.input_scale": "model-00005-of-00010.safetensors",
|
| 759 |
"model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00010.safetensors",
|
| 760 |
"model.layers.16.block_sparse_moe.experts.6.w1.weight_scale": "model-00005-of-00010.safetensors",
|
| 761 |
-
"model.layers.16.block_sparse_moe.experts.6.w2.input_scale": "model-
|
| 762 |
-
"model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-
|
| 763 |
-
"model.layers.16.block_sparse_moe.experts.6.w2.weight_scale": "model-
|
| 764 |
-
"model.layers.16.block_sparse_moe.experts.6.w3.input_scale": "model-
|
| 765 |
-
"model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-
|
| 766 |
-
"model.layers.16.block_sparse_moe.experts.6.w3.weight_scale": "model-
|
| 767 |
-
"model.layers.16.block_sparse_moe.experts.7.w1.input_scale": "model-
|
| 768 |
-
"model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-
|
| 769 |
-
"model.layers.16.block_sparse_moe.experts.7.w1.weight_scale": "model-
|
| 770 |
-
"model.layers.16.block_sparse_moe.experts.7.w2.input_scale": "model-
|
| 771 |
-
"model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-
|
| 772 |
-
"model.layers.16.block_sparse_moe.experts.7.w2.weight_scale": "model-
|
| 773 |
-
"model.layers.16.block_sparse_moe.experts.7.w3.input_scale": "model-
|
| 774 |
-
"model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-
|
| 775 |
-
"model.layers.16.block_sparse_moe.experts.7.w3.weight_scale": "model-
|
| 776 |
"model.layers.16.block_sparse_moe.gate.weight": "model-00005-of-00010.safetensors",
|
| 777 |
-
"model.layers.16.input_layernorm.weight": "model-
|
| 778 |
-
"model.layers.16.post_attention_layernorm.weight": "model-
|
| 779 |
"model.layers.16.self_attn.k_proj.input_scale": "model-00005-of-00010.safetensors",
|
| 780 |
"model.layers.16.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
| 781 |
"model.layers.16.self_attn.k_proj.weight_scale": "model-00005-of-00010.safetensors",
|
| 782 |
"model.layers.16.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
|
|
|
| 783 |
"model.layers.16.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
|
|
|
| 784 |
"model.layers.16.self_attn.q_proj.input_scale": "model-00005-of-00010.safetensors",
|
| 785 |
"model.layers.16.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
| 786 |
"model.layers.16.self_attn.q_proj.weight_scale": "model-00005-of-00010.safetensors",
|
|
@@ -866,15 +884,17 @@
|
|
| 866 |
"model.layers.17.self_attn.k_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 867 |
"model.layers.17.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
| 868 |
"model.layers.17.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 869 |
-
"model.layers.17.self_attn.k_scale": "model-
|
|
|
|
| 870 |
"model.layers.17.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
|
|
|
| 871 |
"model.layers.17.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 872 |
"model.layers.17.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
| 873 |
"model.layers.17.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 874 |
"model.layers.17.self_attn.v_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 875 |
"model.layers.17.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
| 876 |
"model.layers.17.self_attn.v_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 877 |
-
"model.layers.17.self_attn.v_scale": "model-
|
| 878 |
"model.layers.18.block_sparse_moe.experts.0.w1.input_scale": "model-00006-of-00010.safetensors",
|
| 879 |
"model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00010.safetensors",
|
| 880 |
"model.layers.18.block_sparse_moe.experts.0.w1.weight_scale": "model-00006-of-00010.safetensors",
|
|
@@ -954,7 +974,9 @@
|
|
| 954 |
"model.layers.18.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
| 955 |
"model.layers.18.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 956 |
"model.layers.18.self_attn.k_scale": "model-00006-of-00010.safetensors",
|
|
|
|
| 957 |
"model.layers.18.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
|
|
|
| 958 |
"model.layers.18.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 959 |
"model.layers.18.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
| 960 |
"model.layers.18.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
|
@@ -1041,7 +1063,9 @@
|
|
| 1041 |
"model.layers.19.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
| 1042 |
"model.layers.19.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 1043 |
"model.layers.19.self_attn.k_scale": "model-00006-of-00010.safetensors",
|
|
|
|
| 1044 |
"model.layers.19.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
|
|
|
| 1045 |
"model.layers.19.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 1046 |
"model.layers.19.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
| 1047 |
"model.layers.19.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
|
@@ -1128,7 +1152,9 @@
|
|
| 1128 |
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
| 1129 |
"model.layers.2.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 1130 |
"model.layers.2.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
|
|
|
| 1131 |
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
|
|
|
| 1132 |
"model.layers.2.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
| 1133 |
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
| 1134 |
"model.layers.2.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
|
@@ -1148,24 +1174,24 @@
|
|
| 1148 |
"model.layers.20.block_sparse_moe.experts.1.w1.input_scale": "model-00006-of-00010.safetensors",
|
| 1149 |
"model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00010.safetensors",
|
| 1150 |
"model.layers.20.block_sparse_moe.experts.1.w1.weight_scale": "model-00006-of-00010.safetensors",
|
| 1151 |
-
"model.layers.20.block_sparse_moe.experts.1.w2.input_scale": "model-
|
| 1152 |
-
"model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-
|
| 1153 |
-
"model.layers.20.block_sparse_moe.experts.1.w2.weight_scale": "model-
|
| 1154 |
-
"model.layers.20.block_sparse_moe.experts.1.w3.input_scale": "model-
|
| 1155 |
-
"model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-
|
| 1156 |
-
"model.layers.20.block_sparse_moe.experts.1.w3.weight_scale": "model-
|
| 1157 |
-
"model.layers.20.block_sparse_moe.experts.2.w1.input_scale": "model-
|
| 1158 |
-
"model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-
|
| 1159 |
-
"model.layers.20.block_sparse_moe.experts.2.w1.weight_scale": "model-
|
| 1160 |
-
"model.layers.20.block_sparse_moe.experts.2.w2.input_scale": "model-
|
| 1161 |
-
"model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-
|
| 1162 |
-
"model.layers.20.block_sparse_moe.experts.2.w2.weight_scale": "model-
|
| 1163 |
-
"model.layers.20.block_sparse_moe.experts.2.w3.input_scale": "model-
|
| 1164 |
-
"model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-
|
| 1165 |
-
"model.layers.20.block_sparse_moe.experts.2.w3.weight_scale": "model-
|
| 1166 |
-
"model.layers.20.block_sparse_moe.experts.3.w1.input_scale": "model-
|
| 1167 |
-
"model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-
|
| 1168 |
-
"model.layers.20.block_sparse_moe.experts.3.w1.weight_scale": "model-
|
| 1169 |
"model.layers.20.block_sparse_moe.experts.3.w2.input_scale": "model-00007-of-00010.safetensors",
|
| 1170 |
"model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00010.safetensors",
|
| 1171 |
"model.layers.20.block_sparse_moe.experts.3.w2.weight_scale": "model-00007-of-00010.safetensors",
|
|
@@ -1215,7 +1241,9 @@
|
|
| 1215 |
"model.layers.20.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
| 1216 |
"model.layers.20.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 1217 |
"model.layers.20.self_attn.k_scale": "model-00006-of-00010.safetensors",
|
|
|
|
| 1218 |
"model.layers.20.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
|
|
|
| 1219 |
"model.layers.20.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 1220 |
"model.layers.20.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
| 1221 |
"model.layers.20.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
|
@@ -1302,7 +1330,9 @@
|
|
| 1302 |
"model.layers.21.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
| 1303 |
"model.layers.21.self_attn.k_proj.weight_scale": "model-00007-of-00010.safetensors",
|
| 1304 |
"model.layers.21.self_attn.k_scale": "model-00007-of-00010.safetensors",
|
|
|
|
| 1305 |
"model.layers.21.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
|
|
|
| 1306 |
"model.layers.21.self_attn.q_proj.input_scale": "model-00007-of-00010.safetensors",
|
| 1307 |
"model.layers.21.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
| 1308 |
"model.layers.21.self_attn.q_proj.weight_scale": "model-00007-of-00010.safetensors",
|
|
@@ -1389,7 +1419,9 @@
|
|
| 1389 |
"model.layers.22.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
| 1390 |
"model.layers.22.self_attn.k_proj.weight_scale": "model-00007-of-00010.safetensors",
|
| 1391 |
"model.layers.22.self_attn.k_scale": "model-00007-of-00010.safetensors",
|
|
|
|
| 1392 |
"model.layers.22.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
|
|
|
| 1393 |
"model.layers.22.self_attn.q_proj.input_scale": "model-00007-of-00010.safetensors",
|
| 1394 |
"model.layers.22.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
| 1395 |
"model.layers.22.self_attn.q_proj.weight_scale": "model-00007-of-00010.safetensors",
|
|
@@ -1439,27 +1471,27 @@
|
|
| 1439 |
"model.layers.23.block_sparse_moe.experts.4.w2.input_scale": "model-00007-of-00010.safetensors",
|
| 1440 |
"model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00010.safetensors",
|
| 1441 |
"model.layers.23.block_sparse_moe.experts.4.w2.weight_scale": "model-00007-of-00010.safetensors",
|
| 1442 |
-
"model.layers.23.block_sparse_moe.experts.4.w3.input_scale": "model-
|
| 1443 |
-
"model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-
|
| 1444 |
-
"model.layers.23.block_sparse_moe.experts.4.w3.weight_scale": "model-
|
| 1445 |
-
"model.layers.23.block_sparse_moe.experts.5.w1.input_scale": "model-
|
| 1446 |
-
"model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-
|
| 1447 |
-
"model.layers.23.block_sparse_moe.experts.5.w1.weight_scale": "model-
|
| 1448 |
-
"model.layers.23.block_sparse_moe.experts.5.w2.input_scale": "model-
|
| 1449 |
-
"model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-
|
| 1450 |
-
"model.layers.23.block_sparse_moe.experts.5.w2.weight_scale": "model-
|
| 1451 |
-
"model.layers.23.block_sparse_moe.experts.5.w3.input_scale": "model-
|
| 1452 |
-
"model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-
|
| 1453 |
-
"model.layers.23.block_sparse_moe.experts.5.w3.weight_scale": "model-
|
| 1454 |
-
"model.layers.23.block_sparse_moe.experts.6.w1.input_scale": "model-
|
| 1455 |
-
"model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-
|
| 1456 |
-
"model.layers.23.block_sparse_moe.experts.6.w1.weight_scale": "model-
|
| 1457 |
-
"model.layers.23.block_sparse_moe.experts.6.w2.input_scale": "model-
|
| 1458 |
-
"model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-
|
| 1459 |
-
"model.layers.23.block_sparse_moe.experts.6.w2.weight_scale": "model-
|
| 1460 |
-
"model.layers.23.block_sparse_moe.experts.6.w3.input_scale": "model-
|
| 1461 |
-
"model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-
|
| 1462 |
-
"model.layers.23.block_sparse_moe.experts.6.w3.weight_scale": "model-
|
| 1463 |
"model.layers.23.block_sparse_moe.experts.7.w1.input_scale": "model-00008-of-00010.safetensors",
|
| 1464 |
"model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors",
|
| 1465 |
"model.layers.23.block_sparse_moe.experts.7.w1.weight_scale": "model-00008-of-00010.safetensors",
|
|
@@ -1476,7 +1508,9 @@
|
|
| 1476 |
"model.layers.23.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
| 1477 |
"model.layers.23.self_attn.k_proj.weight_scale": "model-00007-of-00010.safetensors",
|
| 1478 |
"model.layers.23.self_attn.k_scale": "model-00007-of-00010.safetensors",
|
|
|
|
| 1479 |
"model.layers.23.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
|
|
|
| 1480 |
"model.layers.23.self_attn.q_proj.input_scale": "model-00007-of-00010.safetensors",
|
| 1481 |
"model.layers.23.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
| 1482 |
"model.layers.23.self_attn.q_proj.weight_scale": "model-00007-of-00010.safetensors",
|
|
@@ -1563,7 +1597,9 @@
|
|
| 1563 |
"model.layers.24.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
| 1564 |
"model.layers.24.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1565 |
"model.layers.24.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
|
|
|
| 1566 |
"model.layers.24.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
|
|
|
| 1567 |
"model.layers.24.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1568 |
"model.layers.24.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
| 1569 |
"model.layers.24.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
|
@@ -1650,7 +1686,9 @@
|
|
| 1650 |
"model.layers.25.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
| 1651 |
"model.layers.25.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1652 |
"model.layers.25.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
|
|
|
| 1653 |
"model.layers.25.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
|
|
|
| 1654 |
"model.layers.25.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1655 |
"model.layers.25.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
| 1656 |
"model.layers.25.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
|
@@ -1737,7 +1775,9 @@
|
|
| 1737 |
"model.layers.26.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
| 1738 |
"model.layers.26.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1739 |
"model.layers.26.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
|
|
|
| 1740 |
"model.layers.26.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
|
|
|
| 1741 |
"model.layers.26.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1742 |
"model.layers.26.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
| 1743 |
"model.layers.26.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
|
@@ -1745,27 +1785,27 @@
|
|
| 1745 |
"model.layers.26.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
| 1746 |
"model.layers.26.self_attn.v_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1747 |
"model.layers.26.self_attn.v_scale": "model-00008-of-00010.safetensors",
|
| 1748 |
-
"model.layers.27.block_sparse_moe.experts.0.w1.input_scale": "model-
|
| 1749 |
-
"model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-
|
| 1750 |
-
"model.layers.27.block_sparse_moe.experts.0.w1.weight_scale": "model-
|
| 1751 |
-
"model.layers.27.block_sparse_moe.experts.0.w2.input_scale": "model-
|
| 1752 |
-
"model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-
|
| 1753 |
-
"model.layers.27.block_sparse_moe.experts.0.w2.weight_scale": "model-
|
| 1754 |
-
"model.layers.27.block_sparse_moe.experts.0.w3.input_scale": "model-
|
| 1755 |
-
"model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-
|
| 1756 |
-
"model.layers.27.block_sparse_moe.experts.0.w3.weight_scale": "model-
|
| 1757 |
-
"model.layers.27.block_sparse_moe.experts.1.w1.input_scale": "model-
|
| 1758 |
-
"model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-
|
| 1759 |
-
"model.layers.27.block_sparse_moe.experts.1.w1.weight_scale": "model-
|
| 1760 |
-
"model.layers.27.block_sparse_moe.experts.1.w2.input_scale": "model-
|
| 1761 |
-
"model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-
|
| 1762 |
-
"model.layers.27.block_sparse_moe.experts.1.w2.weight_scale": "model-
|
| 1763 |
-
"model.layers.27.block_sparse_moe.experts.1.w3.input_scale": "model-
|
| 1764 |
-
"model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-
|
| 1765 |
-
"model.layers.27.block_sparse_moe.experts.1.w3.weight_scale": "model-
|
| 1766 |
-
"model.layers.27.block_sparse_moe.experts.2.w1.input_scale": "model-
|
| 1767 |
-
"model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-
|
| 1768 |
-
"model.layers.27.block_sparse_moe.experts.2.w1.weight_scale": "model-
|
| 1769 |
"model.layers.27.block_sparse_moe.experts.2.w2.input_scale": "model-00009-of-00010.safetensors",
|
| 1770 |
"model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00010.safetensors",
|
| 1771 |
"model.layers.27.block_sparse_moe.experts.2.w2.weight_scale": "model-00009-of-00010.safetensors",
|
|
@@ -1817,20 +1857,22 @@
|
|
| 1817 |
"model.layers.27.block_sparse_moe.experts.7.w3.input_scale": "model-00009-of-00010.safetensors",
|
| 1818 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00010.safetensors",
|
| 1819 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight_scale": "model-00009-of-00010.safetensors",
|
| 1820 |
-
"model.layers.27.block_sparse_moe.gate.weight": "model-
|
| 1821 |
"model.layers.27.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
| 1822 |
"model.layers.27.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
| 1823 |
-
"model.layers.27.self_attn.k_proj.input_scale": "model-
|
| 1824 |
-
"model.layers.27.self_attn.k_proj.weight": "model-
|
| 1825 |
-
"model.layers.27.self_attn.k_proj.weight_scale": "model-
|
| 1826 |
"model.layers.27.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
| 1827 |
-
"model.layers.27.self_attn.o_proj.
|
| 1828 |
-
"model.layers.27.self_attn.
|
| 1829 |
-
"model.layers.27.self_attn.
|
| 1830 |
-
"model.layers.27.self_attn.q_proj.
|
| 1831 |
-
"model.layers.27.self_attn.
|
| 1832 |
-
"model.layers.27.self_attn.
|
| 1833 |
-
"model.layers.27.self_attn.v_proj.
|
|
|
|
|
|
|
| 1834 |
"model.layers.27.self_attn.v_scale": "model-00008-of-00010.safetensors",
|
| 1835 |
"model.layers.28.block_sparse_moe.experts.0.w1.input_scale": "model-00009-of-00010.safetensors",
|
| 1836 |
"model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors",
|
|
@@ -1911,7 +1953,9 @@
|
|
| 1911 |
"model.layers.28.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
| 1912 |
"model.layers.28.self_attn.k_proj.weight_scale": "model-00009-of-00010.safetensors",
|
| 1913 |
"model.layers.28.self_attn.k_scale": "model-00009-of-00010.safetensors",
|
|
|
|
| 1914 |
"model.layers.28.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
|
|
|
| 1915 |
"model.layers.28.self_attn.q_proj.input_scale": "model-00009-of-00010.safetensors",
|
| 1916 |
"model.layers.28.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
| 1917 |
"model.layers.28.self_attn.q_proj.weight_scale": "model-00009-of-00010.safetensors",
|
|
@@ -1998,7 +2042,9 @@
|
|
| 1998 |
"model.layers.29.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
| 1999 |
"model.layers.29.self_attn.k_proj.weight_scale": "model-00009-of-00010.safetensors",
|
| 2000 |
"model.layers.29.self_attn.k_scale": "model-00009-of-00010.safetensors",
|
|
|
|
| 2001 |
"model.layers.29.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
|
|
|
| 2002 |
"model.layers.29.self_attn.q_proj.input_scale": "model-00009-of-00010.safetensors",
|
| 2003 |
"model.layers.29.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
| 2004 |
"model.layers.29.self_attn.q_proj.weight_scale": "model-00009-of-00010.safetensors",
|
|
@@ -2018,9 +2064,9 @@
|
|
| 2018 |
"model.layers.3.block_sparse_moe.experts.1.w1.input_scale": "model-00001-of-00010.safetensors",
|
| 2019 |
"model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00010.safetensors",
|
| 2020 |
"model.layers.3.block_sparse_moe.experts.1.w1.weight_scale": "model-00001-of-00010.safetensors",
|
| 2021 |
-
"model.layers.3.block_sparse_moe.experts.1.w2.input_scale": "model-
|
| 2022 |
-
"model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-
|
| 2023 |
-
"model.layers.3.block_sparse_moe.experts.1.w2.weight_scale": "model-
|
| 2024 |
"model.layers.3.block_sparse_moe.experts.1.w3.input_scale": "model-00002-of-00010.safetensors",
|
| 2025 |
"model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors",
|
| 2026 |
"model.layers.3.block_sparse_moe.experts.1.w3.weight_scale": "model-00002-of-00010.safetensors",
|
|
@@ -2085,7 +2131,9 @@
|
|
| 2085 |
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
| 2086 |
"model.layers.3.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 2087 |
"model.layers.3.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
|
|
|
| 2088 |
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
|
|
|
| 2089 |
"model.layers.3.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
| 2090 |
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
| 2091 |
"model.layers.3.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
|
@@ -2120,33 +2168,33 @@
|
|
| 2120 |
"model.layers.30.block_sparse_moe.experts.2.w3.input_scale": "model-00009-of-00010.safetensors",
|
| 2121 |
"model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00010.safetensors",
|
| 2122 |
"model.layers.30.block_sparse_moe.experts.2.w3.weight_scale": "model-00009-of-00010.safetensors",
|
| 2123 |
-
"model.layers.30.block_sparse_moe.experts.3.w1.input_scale": "model-
|
| 2124 |
-
"model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-
|
| 2125 |
-
"model.layers.30.block_sparse_moe.experts.3.w1.weight_scale": "model-
|
| 2126 |
-
"model.layers.30.block_sparse_moe.experts.3.w2.input_scale": "model-
|
| 2127 |
-
"model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-
|
| 2128 |
-
"model.layers.30.block_sparse_moe.experts.3.w2.weight_scale": "model-
|
| 2129 |
-
"model.layers.30.block_sparse_moe.experts.3.w3.input_scale": "model-
|
| 2130 |
-
"model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-
|
| 2131 |
-
"model.layers.30.block_sparse_moe.experts.3.w3.weight_scale": "model-
|
| 2132 |
-
"model.layers.30.block_sparse_moe.experts.4.w1.input_scale": "model-
|
| 2133 |
-
"model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-
|
| 2134 |
-
"model.layers.30.block_sparse_moe.experts.4.w1.weight_scale": "model-
|
| 2135 |
-
"model.layers.30.block_sparse_moe.experts.4.w2.input_scale": "model-
|
| 2136 |
-
"model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-
|
| 2137 |
-
"model.layers.30.block_sparse_moe.experts.4.w2.weight_scale": "model-
|
| 2138 |
-
"model.layers.30.block_sparse_moe.experts.4.w3.input_scale": "model-
|
| 2139 |
-
"model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-
|
| 2140 |
-
"model.layers.30.block_sparse_moe.experts.4.w3.weight_scale": "model-
|
| 2141 |
-
"model.layers.30.block_sparse_moe.experts.5.w1.input_scale": "model-
|
| 2142 |
-
"model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-
|
| 2143 |
-
"model.layers.30.block_sparse_moe.experts.5.w1.weight_scale": "model-
|
| 2144 |
-
"model.layers.30.block_sparse_moe.experts.5.w2.input_scale": "model-
|
| 2145 |
-
"model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-
|
| 2146 |
-
"model.layers.30.block_sparse_moe.experts.5.w2.weight_scale": "model-
|
| 2147 |
-
"model.layers.30.block_sparse_moe.experts.5.w3.input_scale": "model-
|
| 2148 |
-
"model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-
|
| 2149 |
-
"model.layers.30.block_sparse_moe.experts.5.w3.weight_scale": "model-
|
| 2150 |
"model.layers.30.block_sparse_moe.experts.6.w1.input_scale": "model-00010-of-00010.safetensors",
|
| 2151 |
"model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00010.safetensors",
|
| 2152 |
"model.layers.30.block_sparse_moe.experts.6.w1.weight_scale": "model-00010-of-00010.safetensors",
|
|
@@ -2172,7 +2220,9 @@
|
|
| 2172 |
"model.layers.30.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
| 2173 |
"model.layers.30.self_attn.k_proj.weight_scale": "model-00009-of-00010.safetensors",
|
| 2174 |
"model.layers.30.self_attn.k_scale": "model-00009-of-00010.safetensors",
|
|
|
|
| 2175 |
"model.layers.30.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
|
|
|
| 2176 |
"model.layers.30.self_attn.q_proj.input_scale": "model-00009-of-00010.safetensors",
|
| 2177 |
"model.layers.30.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
| 2178 |
"model.layers.30.self_attn.q_proj.weight_scale": "model-00009-of-00010.safetensors",
|
|
@@ -2259,7 +2309,9 @@
|
|
| 2259 |
"model.layers.31.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
| 2260 |
"model.layers.31.self_attn.k_proj.weight_scale": "model-00010-of-00010.safetensors",
|
| 2261 |
"model.layers.31.self_attn.k_scale": "model-00010-of-00010.safetensors",
|
|
|
|
| 2262 |
"model.layers.31.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
|
|
|
| 2263 |
"model.layers.31.self_attn.q_proj.input_scale": "model-00010-of-00010.safetensors",
|
| 2264 |
"model.layers.31.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
| 2265 |
"model.layers.31.self_attn.q_proj.weight_scale": "model-00010-of-00010.safetensors",
|
|
@@ -2346,7 +2398,9 @@
|
|
| 2346 |
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
| 2347 |
"model.layers.4.self_attn.k_proj.weight_scale": "model-00002-of-00010.safetensors",
|
| 2348 |
"model.layers.4.self_attn.k_scale": "model-00002-of-00010.safetensors",
|
|
|
|
| 2349 |
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
|
|
|
| 2350 |
"model.layers.4.self_attn.q_proj.input_scale": "model-00002-of-00010.safetensors",
|
| 2351 |
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
| 2352 |
"model.layers.4.self_attn.q_proj.weight_scale": "model-00002-of-00010.safetensors",
|
|
@@ -2433,7 +2487,9 @@
|
|
| 2433 |
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
| 2434 |
"model.layers.5.self_attn.k_proj.weight_scale": "model-00002-of-00010.safetensors",
|
| 2435 |
"model.layers.5.self_attn.k_scale": "model-00002-of-00010.safetensors",
|
|
|
|
| 2436 |
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
|
|
|
| 2437 |
"model.layers.5.self_attn.q_proj.input_scale": "model-00002-of-00010.safetensors",
|
| 2438 |
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
| 2439 |
"model.layers.5.self_attn.q_proj.weight_scale": "model-00002-of-00010.safetensors",
|
|
@@ -2483,12 +2539,12 @@
|
|
| 2483 |
"model.layers.6.block_sparse_moe.experts.4.w2.input_scale": "model-00002-of-00010.safetensors",
|
| 2484 |
"model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00010.safetensors",
|
| 2485 |
"model.layers.6.block_sparse_moe.experts.4.w2.weight_scale": "model-00002-of-00010.safetensors",
|
| 2486 |
-
"model.layers.6.block_sparse_moe.experts.4.w3.input_scale": "model-
|
| 2487 |
-
"model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-
|
| 2488 |
-
"model.layers.6.block_sparse_moe.experts.4.w3.weight_scale": "model-
|
| 2489 |
-
"model.layers.6.block_sparse_moe.experts.5.w1.input_scale": "model-
|
| 2490 |
-
"model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-
|
| 2491 |
-
"model.layers.6.block_sparse_moe.experts.5.w1.weight_scale": "model-
|
| 2492 |
"model.layers.6.block_sparse_moe.experts.5.w2.input_scale": "model-00003-of-00010.safetensors",
|
| 2493 |
"model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00010.safetensors",
|
| 2494 |
"model.layers.6.block_sparse_moe.experts.5.w2.weight_scale": "model-00003-of-00010.safetensors",
|
|
@@ -2520,7 +2576,9 @@
|
|
| 2520 |
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
| 2521 |
"model.layers.6.self_attn.k_proj.weight_scale": "model-00002-of-00010.safetensors",
|
| 2522 |
"model.layers.6.self_attn.k_scale": "model-00002-of-00010.safetensors",
|
|
|
|
| 2523 |
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
|
|
|
| 2524 |
"model.layers.6.self_attn.q_proj.input_scale": "model-00002-of-00010.safetensors",
|
| 2525 |
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
| 2526 |
"model.layers.6.self_attn.q_proj.weight_scale": "model-00002-of-00010.safetensors",
|
|
@@ -2607,7 +2665,9 @@
|
|
| 2607 |
"model.layers.7.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
| 2608 |
"model.layers.7.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 2609 |
"model.layers.7.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
|
|
|
| 2610 |
"model.layers.7.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
|
|
|
| 2611 |
"model.layers.7.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 2612 |
"model.layers.7.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
| 2613 |
"model.layers.7.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
|
@@ -2694,7 +2754,9 @@
|
|
| 2694 |
"model.layers.8.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
| 2695 |
"model.layers.8.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 2696 |
"model.layers.8.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
|
|
|
| 2697 |
"model.layers.8.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
|
|
|
| 2698 |
"model.layers.8.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 2699 |
"model.layers.8.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
| 2700 |
"model.layers.8.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
|
@@ -2781,7 +2843,9 @@
|
|
| 2781 |
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
| 2782 |
"model.layers.9.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 2783 |
"model.layers.9.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
|
|
|
| 2784 |
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
|
|
|
| 2785 |
"model.layers.9.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 2786 |
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
| 2787 |
"model.layers.9.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_size": 46966255232
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"lm_head.weight": "model-00010-of-00010.safetensors",
|
|
|
|
| 84 |
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
| 85 |
"model.layers.0.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 86 |
"model.layers.0.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
| 87 |
+
"model.layers.0.self_attn.o_proj.input_scale": "model-00001-of-00010.safetensors",
|
| 88 |
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
| 89 |
+
"model.layers.0.self_attn.o_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 90 |
"model.layers.0.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
| 91 |
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
| 92 |
"model.layers.0.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
|
|
|
| 173 |
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
| 174 |
"model.layers.1.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 175 |
"model.layers.1.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
| 176 |
+
"model.layers.1.self_attn.o_proj.input_scale": "model-00001-of-00010.safetensors",
|
| 177 |
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
| 178 |
+
"model.layers.1.self_attn.o_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 179 |
"model.layers.1.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
| 180 |
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
| 181 |
"model.layers.1.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
|
|
|
| 183 |
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
| 184 |
"model.layers.1.self_attn.v_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 185 |
"model.layers.1.self_attn.v_scale": "model-00001-of-00010.safetensors",
|
| 186 |
+
"model.layers.10.block_sparse_moe.experts.0.w1.input_scale": "model-00003-of-00010.safetensors",
|
| 187 |
+
"model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00010.safetensors",
|
| 188 |
+
"model.layers.10.block_sparse_moe.experts.0.w1.weight_scale": "model-00003-of-00010.safetensors",
|
| 189 |
+
"model.layers.10.block_sparse_moe.experts.0.w2.input_scale": "model-00003-of-00010.safetensors",
|
| 190 |
+
"model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00010.safetensors",
|
| 191 |
+
"model.layers.10.block_sparse_moe.experts.0.w2.weight_scale": "model-00003-of-00010.safetensors",
|
| 192 |
"model.layers.10.block_sparse_moe.experts.0.w3.input_scale": "model-00004-of-00010.safetensors",
|
| 193 |
"model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00010.safetensors",
|
| 194 |
"model.layers.10.block_sparse_moe.experts.0.w3.weight_scale": "model-00004-of-00010.safetensors",
|
|
|
|
| 255 |
"model.layers.10.block_sparse_moe.experts.7.w3.input_scale": "model-00004-of-00010.safetensors",
|
| 256 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00010.safetensors",
|
| 257 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight_scale": "model-00004-of-00010.safetensors",
|
| 258 |
+
"model.layers.10.block_sparse_moe.gate.weight": "model-00003-of-00010.safetensors",
|
| 259 |
"model.layers.10.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
| 260 |
"model.layers.10.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
| 261 |
+
"model.layers.10.self_attn.k_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 262 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
| 263 |
+
"model.layers.10.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 264 |
"model.layers.10.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
| 265 |
+
"model.layers.10.self_attn.o_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 266 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
| 267 |
+
"model.layers.10.self_attn.o_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 268 |
+
"model.layers.10.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 269 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
| 270 |
+
"model.layers.10.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 271 |
+
"model.layers.10.self_attn.v_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 272 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
| 273 |
+
"model.layers.10.self_attn.v_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 274 |
"model.layers.10.self_attn.v_scale": "model-00003-of-00010.safetensors",
|
| 275 |
"model.layers.11.block_sparse_moe.experts.0.w1.input_scale": "model-00004-of-00010.safetensors",
|
| 276 |
"model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00010.safetensors",
|
|
|
|
| 351 |
"model.layers.11.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
| 352 |
"model.layers.11.self_attn.k_proj.weight_scale": "model-00004-of-00010.safetensors",
|
| 353 |
"model.layers.11.self_attn.k_scale": "model-00004-of-00010.safetensors",
|
| 354 |
+
"model.layers.11.self_attn.o_proj.input_scale": "model-00004-of-00010.safetensors",
|
| 355 |
"model.layers.11.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
| 356 |
+
"model.layers.11.self_attn.o_proj.weight_scale": "model-00004-of-00010.safetensors",
|
| 357 |
"model.layers.11.self_attn.q_proj.input_scale": "model-00004-of-00010.safetensors",
|
| 358 |
"model.layers.11.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
| 359 |
"model.layers.11.self_attn.q_proj.weight_scale": "model-00004-of-00010.safetensors",
|
|
|
|
| 440 |
"model.layers.12.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
| 441 |
"model.layers.12.self_attn.k_proj.weight_scale": "model-00004-of-00010.safetensors",
|
| 442 |
"model.layers.12.self_attn.k_scale": "model-00004-of-00010.safetensors",
|
| 443 |
+
"model.layers.12.self_attn.o_proj.input_scale": "model-00004-of-00010.safetensors",
|
| 444 |
"model.layers.12.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
| 445 |
+
"model.layers.12.self_attn.o_proj.weight_scale": "model-00004-of-00010.safetensors",
|
| 446 |
"model.layers.12.self_attn.q_proj.input_scale": "model-00004-of-00010.safetensors",
|
| 447 |
"model.layers.12.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
| 448 |
"model.layers.12.self_attn.q_proj.weight_scale": "model-00004-of-00010.safetensors",
|
|
|
|
| 477 |
"model.layers.13.block_sparse_moe.experts.2.w3.input_scale": "model-00004-of-00010.safetensors",
|
| 478 |
"model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00010.safetensors",
|
| 479 |
"model.layers.13.block_sparse_moe.experts.2.w3.weight_scale": "model-00004-of-00010.safetensors",
|
| 480 |
+
"model.layers.13.block_sparse_moe.experts.3.w1.input_scale": "model-00004-of-00010.safetensors",
|
| 481 |
+
"model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00010.safetensors",
|
| 482 |
+
"model.layers.13.block_sparse_moe.experts.3.w1.weight_scale": "model-00004-of-00010.safetensors",
|
| 483 |
+
"model.layers.13.block_sparse_moe.experts.3.w2.input_scale": "model-00004-of-00010.safetensors",
|
| 484 |
+
"model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00010.safetensors",
|
| 485 |
+
"model.layers.13.block_sparse_moe.experts.3.w2.weight_scale": "model-00004-of-00010.safetensors",
|
| 486 |
+
"model.layers.13.block_sparse_moe.experts.3.w3.input_scale": "model-00004-of-00010.safetensors",
|
| 487 |
+
"model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00010.safetensors",
|
| 488 |
+
"model.layers.13.block_sparse_moe.experts.3.w3.weight_scale": "model-00004-of-00010.safetensors",
|
| 489 |
+
"model.layers.13.block_sparse_moe.experts.4.w1.input_scale": "model-00004-of-00010.safetensors",
|
| 490 |
+
"model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00010.safetensors",
|
| 491 |
+
"model.layers.13.block_sparse_moe.experts.4.w1.weight_scale": "model-00004-of-00010.safetensors",
|
| 492 |
"model.layers.13.block_sparse_moe.experts.4.w2.input_scale": "model-00005-of-00010.safetensors",
|
| 493 |
"model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors",
|
| 494 |
"model.layers.13.block_sparse_moe.experts.4.w2.weight_scale": "model-00005-of-00010.safetensors",
|
|
|
|
| 529 |
"model.layers.13.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
| 530 |
"model.layers.13.self_attn.k_proj.weight_scale": "model-00004-of-00010.safetensors",
|
| 531 |
"model.layers.13.self_attn.k_scale": "model-00004-of-00010.safetensors",
|
| 532 |
+
"model.layers.13.self_attn.o_proj.input_scale": "model-00004-of-00010.safetensors",
|
| 533 |
"model.layers.13.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
| 534 |
+
"model.layers.13.self_attn.o_proj.weight_scale": "model-00004-of-00010.safetensors",
|
| 535 |
"model.layers.13.self_attn.q_proj.input_scale": "model-00004-of-00010.safetensors",
|
| 536 |
"model.layers.13.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
| 537 |
"model.layers.13.self_attn.q_proj.weight_scale": "model-00004-of-00010.safetensors",
|
|
|
|
| 618 |
"model.layers.14.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
| 619 |
"model.layers.14.self_attn.k_proj.weight_scale": "model-00005-of-00010.safetensors",
|
| 620 |
"model.layers.14.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
| 621 |
+
"model.layers.14.self_attn.o_proj.input_scale": "model-00005-of-00010.safetensors",
|
| 622 |
"model.layers.14.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
| 623 |
+
"model.layers.14.self_attn.o_proj.weight_scale": "model-00005-of-00010.safetensors",
|
| 624 |
"model.layers.14.self_attn.q_proj.input_scale": "model-00005-of-00010.safetensors",
|
| 625 |
"model.layers.14.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
| 626 |
"model.layers.14.self_attn.q_proj.weight_scale": "model-00005-of-00010.safetensors",
|
|
|
|
| 707 |
"model.layers.15.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
| 708 |
"model.layers.15.self_attn.k_proj.weight_scale": "model-00005-of-00010.safetensors",
|
| 709 |
"model.layers.15.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
| 710 |
+
"model.layers.15.self_attn.o_proj.input_scale": "model-00005-of-00010.safetensors",
|
| 711 |
"model.layers.15.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
| 712 |
+
"model.layers.15.self_attn.o_proj.weight_scale": "model-00005-of-00010.safetensors",
|
| 713 |
"model.layers.15.self_attn.q_proj.input_scale": "model-00005-of-00010.safetensors",
|
| 714 |
"model.layers.15.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
| 715 |
"model.layers.15.self_attn.q_proj.weight_scale": "model-00005-of-00010.safetensors",
|
|
|
|
| 774 |
"model.layers.16.block_sparse_moe.experts.6.w1.input_scale": "model-00005-of-00010.safetensors",
|
| 775 |
"model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00010.safetensors",
|
| 776 |
"model.layers.16.block_sparse_moe.experts.6.w1.weight_scale": "model-00005-of-00010.safetensors",
|
| 777 |
+
"model.layers.16.block_sparse_moe.experts.6.w2.input_scale": "model-00005-of-00010.safetensors",
|
| 778 |
+
"model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00010.safetensors",
|
| 779 |
+
"model.layers.16.block_sparse_moe.experts.6.w2.weight_scale": "model-00005-of-00010.safetensors",
|
| 780 |
+
"model.layers.16.block_sparse_moe.experts.6.w3.input_scale": "model-00005-of-00010.safetensors",
|
| 781 |
+
"model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00010.safetensors",
|
| 782 |
+
"model.layers.16.block_sparse_moe.experts.6.w3.weight_scale": "model-00005-of-00010.safetensors",
|
| 783 |
+
"model.layers.16.block_sparse_moe.experts.7.w1.input_scale": "model-00005-of-00010.safetensors",
|
| 784 |
+
"model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00010.safetensors",
|
| 785 |
+
"model.layers.16.block_sparse_moe.experts.7.w1.weight_scale": "model-00005-of-00010.safetensors",
|
| 786 |
+
"model.layers.16.block_sparse_moe.experts.7.w2.input_scale": "model-00005-of-00010.safetensors",
|
| 787 |
+
"model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors",
|
| 788 |
+
"model.layers.16.block_sparse_moe.experts.7.w2.weight_scale": "model-00005-of-00010.safetensors",
|
| 789 |
+
"model.layers.16.block_sparse_moe.experts.7.w3.input_scale": "model-00005-of-00010.safetensors",
|
| 790 |
+
"model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00010.safetensors",
|
| 791 |
+
"model.layers.16.block_sparse_moe.experts.7.w3.weight_scale": "model-00005-of-00010.safetensors",
|
| 792 |
"model.layers.16.block_sparse_moe.gate.weight": "model-00005-of-00010.safetensors",
|
| 793 |
+
"model.layers.16.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
| 794 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
| 795 |
"model.layers.16.self_attn.k_proj.input_scale": "model-00005-of-00010.safetensors",
|
| 796 |
"model.layers.16.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
| 797 |
"model.layers.16.self_attn.k_proj.weight_scale": "model-00005-of-00010.safetensors",
|
| 798 |
"model.layers.16.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
| 799 |
+
"model.layers.16.self_attn.o_proj.input_scale": "model-00005-of-00010.safetensors",
|
| 800 |
"model.layers.16.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
| 801 |
+
"model.layers.16.self_attn.o_proj.weight_scale": "model-00005-of-00010.safetensors",
|
| 802 |
"model.layers.16.self_attn.q_proj.input_scale": "model-00005-of-00010.safetensors",
|
| 803 |
"model.layers.16.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
| 804 |
"model.layers.16.self_attn.q_proj.weight_scale": "model-00005-of-00010.safetensors",
|
|
|
|
| 884 |
"model.layers.17.self_attn.k_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 885 |
"model.layers.17.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
| 886 |
"model.layers.17.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 887 |
+
"model.layers.17.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
| 888 |
+
"model.layers.17.self_attn.o_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 889 |
"model.layers.17.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
| 890 |
+
"model.layers.17.self_attn.o_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 891 |
"model.layers.17.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 892 |
"model.layers.17.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
| 893 |
"model.layers.17.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 894 |
"model.layers.17.self_attn.v_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 895 |
"model.layers.17.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
| 896 |
"model.layers.17.self_attn.v_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 897 |
+
"model.layers.17.self_attn.v_scale": "model-00005-of-00010.safetensors",
|
| 898 |
"model.layers.18.block_sparse_moe.experts.0.w1.input_scale": "model-00006-of-00010.safetensors",
|
| 899 |
"model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00010.safetensors",
|
| 900 |
"model.layers.18.block_sparse_moe.experts.0.w1.weight_scale": "model-00006-of-00010.safetensors",
|
|
|
|
| 974 |
"model.layers.18.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
| 975 |
"model.layers.18.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 976 |
"model.layers.18.self_attn.k_scale": "model-00006-of-00010.safetensors",
|
| 977 |
+
"model.layers.18.self_attn.o_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 978 |
"model.layers.18.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
| 979 |
+
"model.layers.18.self_attn.o_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 980 |
"model.layers.18.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 981 |
"model.layers.18.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
| 982 |
"model.layers.18.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
|
|
|
| 1063 |
"model.layers.19.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
| 1064 |
"model.layers.19.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 1065 |
"model.layers.19.self_attn.k_scale": "model-00006-of-00010.safetensors",
|
| 1066 |
+
"model.layers.19.self_attn.o_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 1067 |
"model.layers.19.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
| 1068 |
+
"model.layers.19.self_attn.o_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 1069 |
"model.layers.19.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 1070 |
"model.layers.19.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
| 1071 |
"model.layers.19.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
|
|
|
| 1152 |
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
| 1153 |
"model.layers.2.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 1154 |
"model.layers.2.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
| 1155 |
+
"model.layers.2.self_attn.o_proj.input_scale": "model-00001-of-00010.safetensors",
|
| 1156 |
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
| 1157 |
+
"model.layers.2.self_attn.o_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 1158 |
"model.layers.2.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
| 1159 |
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
| 1160 |
"model.layers.2.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
|
|
|
| 1174 |
"model.layers.20.block_sparse_moe.experts.1.w1.input_scale": "model-00006-of-00010.safetensors",
|
| 1175 |
"model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00010.safetensors",
|
| 1176 |
"model.layers.20.block_sparse_moe.experts.1.w1.weight_scale": "model-00006-of-00010.safetensors",
|
| 1177 |
+
"model.layers.20.block_sparse_moe.experts.1.w2.input_scale": "model-00006-of-00010.safetensors",
|
| 1178 |
+
"model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00010.safetensors",
|
| 1179 |
+
"model.layers.20.block_sparse_moe.experts.1.w2.weight_scale": "model-00006-of-00010.safetensors",
|
| 1180 |
+
"model.layers.20.block_sparse_moe.experts.1.w3.input_scale": "model-00006-of-00010.safetensors",
|
| 1181 |
+
"model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00010.safetensors",
|
| 1182 |
+
"model.layers.20.block_sparse_moe.experts.1.w3.weight_scale": "model-00006-of-00010.safetensors",
|
| 1183 |
+
"model.layers.20.block_sparse_moe.experts.2.w1.input_scale": "model-00006-of-00010.safetensors",
|
| 1184 |
+
"model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00010.safetensors",
|
| 1185 |
+
"model.layers.20.block_sparse_moe.experts.2.w1.weight_scale": "model-00006-of-00010.safetensors",
|
| 1186 |
+
"model.layers.20.block_sparse_moe.experts.2.w2.input_scale": "model-00006-of-00010.safetensors",
|
| 1187 |
+
"model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00010.safetensors",
|
| 1188 |
+
"model.layers.20.block_sparse_moe.experts.2.w2.weight_scale": "model-00006-of-00010.safetensors",
|
| 1189 |
+
"model.layers.20.block_sparse_moe.experts.2.w3.input_scale": "model-00006-of-00010.safetensors",
|
| 1190 |
+
"model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00010.safetensors",
|
| 1191 |
+
"model.layers.20.block_sparse_moe.experts.2.w3.weight_scale": "model-00006-of-00010.safetensors",
|
| 1192 |
+
"model.layers.20.block_sparse_moe.experts.3.w1.input_scale": "model-00006-of-00010.safetensors",
|
| 1193 |
+
"model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00010.safetensors",
|
| 1194 |
+
"model.layers.20.block_sparse_moe.experts.3.w1.weight_scale": "model-00006-of-00010.safetensors",
|
| 1195 |
"model.layers.20.block_sparse_moe.experts.3.w2.input_scale": "model-00007-of-00010.safetensors",
|
| 1196 |
"model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00010.safetensors",
|
| 1197 |
"model.layers.20.block_sparse_moe.experts.3.w2.weight_scale": "model-00007-of-00010.safetensors",
|
|
|
|
| 1241 |
"model.layers.20.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
| 1242 |
"model.layers.20.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 1243 |
"model.layers.20.self_attn.k_scale": "model-00006-of-00010.safetensors",
|
| 1244 |
+
"model.layers.20.self_attn.o_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 1245 |
"model.layers.20.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
| 1246 |
+
"model.layers.20.self_attn.o_proj.weight_scale": "model-00006-of-00010.safetensors",
|
| 1247 |
"model.layers.20.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
| 1248 |
"model.layers.20.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
| 1249 |
"model.layers.20.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
|
|
|
| 1330 |
"model.layers.21.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
| 1331 |
"model.layers.21.self_attn.k_proj.weight_scale": "model-00007-of-00010.safetensors",
|
| 1332 |
"model.layers.21.self_attn.k_scale": "model-00007-of-00010.safetensors",
|
| 1333 |
+
"model.layers.21.self_attn.o_proj.input_scale": "model-00007-of-00010.safetensors",
|
| 1334 |
"model.layers.21.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
| 1335 |
+
"model.layers.21.self_attn.o_proj.weight_scale": "model-00007-of-00010.safetensors",
|
| 1336 |
"model.layers.21.self_attn.q_proj.input_scale": "model-00007-of-00010.safetensors",
|
| 1337 |
"model.layers.21.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
| 1338 |
"model.layers.21.self_attn.q_proj.weight_scale": "model-00007-of-00010.safetensors",
|
|
|
|
| 1419 |
"model.layers.22.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
| 1420 |
"model.layers.22.self_attn.k_proj.weight_scale": "model-00007-of-00010.safetensors",
|
| 1421 |
"model.layers.22.self_attn.k_scale": "model-00007-of-00010.safetensors",
|
| 1422 |
+
"model.layers.22.self_attn.o_proj.input_scale": "model-00007-of-00010.safetensors",
|
| 1423 |
"model.layers.22.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
| 1424 |
+
"model.layers.22.self_attn.o_proj.weight_scale": "model-00007-of-00010.safetensors",
|
| 1425 |
"model.layers.22.self_attn.q_proj.input_scale": "model-00007-of-00010.safetensors",
|
| 1426 |
"model.layers.22.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
| 1427 |
"model.layers.22.self_attn.q_proj.weight_scale": "model-00007-of-00010.safetensors",
|
|
|
|
| 1471 |
"model.layers.23.block_sparse_moe.experts.4.w2.input_scale": "model-00007-of-00010.safetensors",
|
| 1472 |
"model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00010.safetensors",
|
| 1473 |
"model.layers.23.block_sparse_moe.experts.4.w2.weight_scale": "model-00007-of-00010.safetensors",
|
| 1474 |
+
"model.layers.23.block_sparse_moe.experts.4.w3.input_scale": "model-00007-of-00010.safetensors",
|
| 1475 |
+
"model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00010.safetensors",
|
| 1476 |
+
"model.layers.23.block_sparse_moe.experts.4.w3.weight_scale": "model-00007-of-00010.safetensors",
|
| 1477 |
+
"model.layers.23.block_sparse_moe.experts.5.w1.input_scale": "model-00007-of-00010.safetensors",
|
| 1478 |
+
"model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00010.safetensors",
|
| 1479 |
+
"model.layers.23.block_sparse_moe.experts.5.w1.weight_scale": "model-00007-of-00010.safetensors",
|
| 1480 |
+
"model.layers.23.block_sparse_moe.experts.5.w2.input_scale": "model-00007-of-00010.safetensors",
|
| 1481 |
+
"model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00007-of-00010.safetensors",
|
| 1482 |
+
"model.layers.23.block_sparse_moe.experts.5.w2.weight_scale": "model-00007-of-00010.safetensors",
|
| 1483 |
+
"model.layers.23.block_sparse_moe.experts.5.w3.input_scale": "model-00007-of-00010.safetensors",
|
| 1484 |
+
"model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00007-of-00010.safetensors",
|
| 1485 |
+
"model.layers.23.block_sparse_moe.experts.5.w3.weight_scale": "model-00007-of-00010.safetensors",
|
| 1486 |
+
"model.layers.23.block_sparse_moe.experts.6.w1.input_scale": "model-00007-of-00010.safetensors",
|
| 1487 |
+
"model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00010.safetensors",
|
| 1488 |
+
"model.layers.23.block_sparse_moe.experts.6.w1.weight_scale": "model-00007-of-00010.safetensors",
|
| 1489 |
+
"model.layers.23.block_sparse_moe.experts.6.w2.input_scale": "model-00007-of-00010.safetensors",
|
| 1490 |
+
"model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00010.safetensors",
|
| 1491 |
+
"model.layers.23.block_sparse_moe.experts.6.w2.weight_scale": "model-00007-of-00010.safetensors",
|
| 1492 |
+
"model.layers.23.block_sparse_moe.experts.6.w3.input_scale": "model-00007-of-00010.safetensors",
|
| 1493 |
+
"model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00010.safetensors",
|
| 1494 |
+
"model.layers.23.block_sparse_moe.experts.6.w3.weight_scale": "model-00007-of-00010.safetensors",
|
| 1495 |
"model.layers.23.block_sparse_moe.experts.7.w1.input_scale": "model-00008-of-00010.safetensors",
|
| 1496 |
"model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors",
|
| 1497 |
"model.layers.23.block_sparse_moe.experts.7.w1.weight_scale": "model-00008-of-00010.safetensors",
|
|
|
|
| 1508 |
"model.layers.23.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
| 1509 |
"model.layers.23.self_attn.k_proj.weight_scale": "model-00007-of-00010.safetensors",
|
| 1510 |
"model.layers.23.self_attn.k_scale": "model-00007-of-00010.safetensors",
|
| 1511 |
+
"model.layers.23.self_attn.o_proj.input_scale": "model-00007-of-00010.safetensors",
|
| 1512 |
"model.layers.23.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
| 1513 |
+
"model.layers.23.self_attn.o_proj.weight_scale": "model-00007-of-00010.safetensors",
|
| 1514 |
"model.layers.23.self_attn.q_proj.input_scale": "model-00007-of-00010.safetensors",
|
| 1515 |
"model.layers.23.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
| 1516 |
"model.layers.23.self_attn.q_proj.weight_scale": "model-00007-of-00010.safetensors",
|
|
|
|
| 1597 |
"model.layers.24.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
| 1598 |
"model.layers.24.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1599 |
"model.layers.24.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
| 1600 |
+
"model.layers.24.self_attn.o_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1601 |
"model.layers.24.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
| 1602 |
+
"model.layers.24.self_attn.o_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1603 |
"model.layers.24.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1604 |
"model.layers.24.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
| 1605 |
"model.layers.24.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
|
|
|
| 1686 |
"model.layers.25.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
| 1687 |
"model.layers.25.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1688 |
"model.layers.25.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
| 1689 |
+
"model.layers.25.self_attn.o_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1690 |
"model.layers.25.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
| 1691 |
+
"model.layers.25.self_attn.o_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1692 |
"model.layers.25.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1693 |
"model.layers.25.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
| 1694 |
"model.layers.25.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
|
|
|
| 1775 |
"model.layers.26.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
| 1776 |
"model.layers.26.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1777 |
"model.layers.26.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
| 1778 |
+
"model.layers.26.self_attn.o_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1779 |
"model.layers.26.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
| 1780 |
+
"model.layers.26.self_attn.o_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1781 |
"model.layers.26.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1782 |
"model.layers.26.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
| 1783 |
"model.layers.26.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
|
|
|
| 1785 |
"model.layers.26.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
| 1786 |
"model.layers.26.self_attn.v_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1787 |
"model.layers.26.self_attn.v_scale": "model-00008-of-00010.safetensors",
|
| 1788 |
+
"model.layers.27.block_sparse_moe.experts.0.w1.input_scale": "model-00008-of-00010.safetensors",
|
| 1789 |
+
"model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors",
|
| 1790 |
+
"model.layers.27.block_sparse_moe.experts.0.w1.weight_scale": "model-00008-of-00010.safetensors",
|
| 1791 |
+
"model.layers.27.block_sparse_moe.experts.0.w2.input_scale": "model-00008-of-00010.safetensors",
|
| 1792 |
+
"model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00010.safetensors",
|
| 1793 |
+
"model.layers.27.block_sparse_moe.experts.0.w2.weight_scale": "model-00008-of-00010.safetensors",
|
| 1794 |
+
"model.layers.27.block_sparse_moe.experts.0.w3.input_scale": "model-00008-of-00010.safetensors",
|
| 1795 |
+
"model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00010.safetensors",
|
| 1796 |
+
"model.layers.27.block_sparse_moe.experts.0.w3.weight_scale": "model-00008-of-00010.safetensors",
|
| 1797 |
+
"model.layers.27.block_sparse_moe.experts.1.w1.input_scale": "model-00008-of-00010.safetensors",
|
| 1798 |
+
"model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors",
|
| 1799 |
+
"model.layers.27.block_sparse_moe.experts.1.w1.weight_scale": "model-00008-of-00010.safetensors",
|
| 1800 |
+
"model.layers.27.block_sparse_moe.experts.1.w2.input_scale": "model-00008-of-00010.safetensors",
|
| 1801 |
+
"model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00010.safetensors",
|
| 1802 |
+
"model.layers.27.block_sparse_moe.experts.1.w2.weight_scale": "model-00008-of-00010.safetensors",
|
| 1803 |
+
"model.layers.27.block_sparse_moe.experts.1.w3.input_scale": "model-00008-of-00010.safetensors",
|
| 1804 |
+
"model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00010.safetensors",
|
| 1805 |
+
"model.layers.27.block_sparse_moe.experts.1.w3.weight_scale": "model-00008-of-00010.safetensors",
|
| 1806 |
+
"model.layers.27.block_sparse_moe.experts.2.w1.input_scale": "model-00008-of-00010.safetensors",
|
| 1807 |
+
"model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors",
|
| 1808 |
+
"model.layers.27.block_sparse_moe.experts.2.w1.weight_scale": "model-00008-of-00010.safetensors",
|
| 1809 |
"model.layers.27.block_sparse_moe.experts.2.w2.input_scale": "model-00009-of-00010.safetensors",
|
| 1810 |
"model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00010.safetensors",
|
| 1811 |
"model.layers.27.block_sparse_moe.experts.2.w2.weight_scale": "model-00009-of-00010.safetensors",
|
|
|
|
| 1857 |
"model.layers.27.block_sparse_moe.experts.7.w3.input_scale": "model-00009-of-00010.safetensors",
|
| 1858 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00010.safetensors",
|
| 1859 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight_scale": "model-00009-of-00010.safetensors",
|
| 1860 |
+
"model.layers.27.block_sparse_moe.gate.weight": "model-00008-of-00010.safetensors",
|
| 1861 |
"model.layers.27.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
| 1862 |
"model.layers.27.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
| 1863 |
+
"model.layers.27.self_attn.k_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1864 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
| 1865 |
+
"model.layers.27.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1866 |
"model.layers.27.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
| 1867 |
+
"model.layers.27.self_attn.o_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1868 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
| 1869 |
+
"model.layers.27.self_attn.o_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1870 |
+
"model.layers.27.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1871 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
| 1872 |
+
"model.layers.27.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1873 |
+
"model.layers.27.self_attn.v_proj.input_scale": "model-00008-of-00010.safetensors",
|
| 1874 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
| 1875 |
+
"model.layers.27.self_attn.v_proj.weight_scale": "model-00008-of-00010.safetensors",
|
| 1876 |
"model.layers.27.self_attn.v_scale": "model-00008-of-00010.safetensors",
|
| 1877 |
"model.layers.28.block_sparse_moe.experts.0.w1.input_scale": "model-00009-of-00010.safetensors",
|
| 1878 |
"model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors",
|
|
|
|
| 1953 |
"model.layers.28.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
| 1954 |
"model.layers.28.self_attn.k_proj.weight_scale": "model-00009-of-00010.safetensors",
|
| 1955 |
"model.layers.28.self_attn.k_scale": "model-00009-of-00010.safetensors",
|
| 1956 |
+
"model.layers.28.self_attn.o_proj.input_scale": "model-00009-of-00010.safetensors",
|
| 1957 |
"model.layers.28.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
| 1958 |
+
"model.layers.28.self_attn.o_proj.weight_scale": "model-00009-of-00010.safetensors",
|
| 1959 |
"model.layers.28.self_attn.q_proj.input_scale": "model-00009-of-00010.safetensors",
|
| 1960 |
"model.layers.28.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
| 1961 |
"model.layers.28.self_attn.q_proj.weight_scale": "model-00009-of-00010.safetensors",
|
|
|
|
| 2042 |
"model.layers.29.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
| 2043 |
"model.layers.29.self_attn.k_proj.weight_scale": "model-00009-of-00010.safetensors",
|
| 2044 |
"model.layers.29.self_attn.k_scale": "model-00009-of-00010.safetensors",
|
| 2045 |
+
"model.layers.29.self_attn.o_proj.input_scale": "model-00009-of-00010.safetensors",
|
| 2046 |
"model.layers.29.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
| 2047 |
+
"model.layers.29.self_attn.o_proj.weight_scale": "model-00009-of-00010.safetensors",
|
| 2048 |
"model.layers.29.self_attn.q_proj.input_scale": "model-00009-of-00010.safetensors",
|
| 2049 |
"model.layers.29.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
| 2050 |
"model.layers.29.self_attn.q_proj.weight_scale": "model-00009-of-00010.safetensors",
|
|
|
|
| 2064 |
"model.layers.3.block_sparse_moe.experts.1.w1.input_scale": "model-00001-of-00010.safetensors",
|
| 2065 |
"model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00010.safetensors",
|
| 2066 |
"model.layers.3.block_sparse_moe.experts.1.w1.weight_scale": "model-00001-of-00010.safetensors",
|
| 2067 |
+
"model.layers.3.block_sparse_moe.experts.1.w2.input_scale": "model-00001-of-00010.safetensors",
|
| 2068 |
+
"model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00010.safetensors",
|
| 2069 |
+
"model.layers.3.block_sparse_moe.experts.1.w2.weight_scale": "model-00001-of-00010.safetensors",
|
| 2070 |
"model.layers.3.block_sparse_moe.experts.1.w3.input_scale": "model-00002-of-00010.safetensors",
|
| 2071 |
"model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors",
|
| 2072 |
"model.layers.3.block_sparse_moe.experts.1.w3.weight_scale": "model-00002-of-00010.safetensors",
|
|
|
|
| 2131 |
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
| 2132 |
"model.layers.3.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 2133 |
"model.layers.3.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
| 2134 |
+
"model.layers.3.self_attn.o_proj.input_scale": "model-00001-of-00010.safetensors",
|
| 2135 |
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
| 2136 |
+
"model.layers.3.self_attn.o_proj.weight_scale": "model-00001-of-00010.safetensors",
|
| 2137 |
"model.layers.3.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
| 2138 |
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
| 2139 |
"model.layers.3.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
|
|
|
| 2168 |
"model.layers.30.block_sparse_moe.experts.2.w3.input_scale": "model-00009-of-00010.safetensors",
|
| 2169 |
"model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00010.safetensors",
|
| 2170 |
"model.layers.30.block_sparse_moe.experts.2.w3.weight_scale": "model-00009-of-00010.safetensors",
|
| 2171 |
+
"model.layers.30.block_sparse_moe.experts.3.w1.input_scale": "model-00009-of-00010.safetensors",
|
| 2172 |
+
"model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors",
|
| 2173 |
+
"model.layers.30.block_sparse_moe.experts.3.w1.weight_scale": "model-00009-of-00010.safetensors",
|
| 2174 |
+
"model.layers.30.block_sparse_moe.experts.3.w2.input_scale": "model-00009-of-00010.safetensors",
|
| 2175 |
+
"model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00010.safetensors",
|
| 2176 |
+
"model.layers.30.block_sparse_moe.experts.3.w2.weight_scale": "model-00009-of-00010.safetensors",
|
| 2177 |
+
"model.layers.30.block_sparse_moe.experts.3.w3.input_scale": "model-00009-of-00010.safetensors",
|
| 2178 |
+
"model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00010.safetensors",
|
| 2179 |
+
"model.layers.30.block_sparse_moe.experts.3.w3.weight_scale": "model-00009-of-00010.safetensors",
|
| 2180 |
+
"model.layers.30.block_sparse_moe.experts.4.w1.input_scale": "model-00009-of-00010.safetensors",
|
| 2181 |
+
"model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors",
|
| 2182 |
+
"model.layers.30.block_sparse_moe.experts.4.w1.weight_scale": "model-00009-of-00010.safetensors",
|
| 2183 |
+
"model.layers.30.block_sparse_moe.experts.4.w2.input_scale": "model-00009-of-00010.safetensors",
|
| 2184 |
+
"model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00010.safetensors",
|
| 2185 |
+
"model.layers.30.block_sparse_moe.experts.4.w2.weight_scale": "model-00009-of-00010.safetensors",
|
| 2186 |
+
"model.layers.30.block_sparse_moe.experts.4.w3.input_scale": "model-00009-of-00010.safetensors",
|
| 2187 |
+
"model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00010.safetensors",
|
| 2188 |
+
"model.layers.30.block_sparse_moe.experts.4.w3.weight_scale": "model-00009-of-00010.safetensors",
|
| 2189 |
+
"model.layers.30.block_sparse_moe.experts.5.w1.input_scale": "model-00009-of-00010.safetensors",
|
| 2190 |
+
"model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors",
|
| 2191 |
+
"model.layers.30.block_sparse_moe.experts.5.w1.weight_scale": "model-00009-of-00010.safetensors",
|
| 2192 |
+
"model.layers.30.block_sparse_moe.experts.5.w2.input_scale": "model-00009-of-00010.safetensors",
|
| 2193 |
+
"model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00010.safetensors",
|
| 2194 |
+
"model.layers.30.block_sparse_moe.experts.5.w2.weight_scale": "model-00009-of-00010.safetensors",
|
| 2195 |
+
"model.layers.30.block_sparse_moe.experts.5.w3.input_scale": "model-00009-of-00010.safetensors",
|
| 2196 |
+
"model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00010.safetensors",
|
| 2197 |
+
"model.layers.30.block_sparse_moe.experts.5.w3.weight_scale": "model-00009-of-00010.safetensors",
|
| 2198 |
"model.layers.30.block_sparse_moe.experts.6.w1.input_scale": "model-00010-of-00010.safetensors",
|
| 2199 |
"model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00010.safetensors",
|
| 2200 |
"model.layers.30.block_sparse_moe.experts.6.w1.weight_scale": "model-00010-of-00010.safetensors",
|
|
|
|
| 2220 |
"model.layers.30.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
| 2221 |
"model.layers.30.self_attn.k_proj.weight_scale": "model-00009-of-00010.safetensors",
|
| 2222 |
"model.layers.30.self_attn.k_scale": "model-00009-of-00010.safetensors",
|
| 2223 |
+
"model.layers.30.self_attn.o_proj.input_scale": "model-00009-of-00010.safetensors",
|
| 2224 |
"model.layers.30.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
| 2225 |
+
"model.layers.30.self_attn.o_proj.weight_scale": "model-00009-of-00010.safetensors",
|
| 2226 |
"model.layers.30.self_attn.q_proj.input_scale": "model-00009-of-00010.safetensors",
|
| 2227 |
"model.layers.30.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
| 2228 |
"model.layers.30.self_attn.q_proj.weight_scale": "model-00009-of-00010.safetensors",
|
|
|
|
| 2309 |
"model.layers.31.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
| 2310 |
"model.layers.31.self_attn.k_proj.weight_scale": "model-00010-of-00010.safetensors",
|
| 2311 |
"model.layers.31.self_attn.k_scale": "model-00010-of-00010.safetensors",
|
| 2312 |
+
"model.layers.31.self_attn.o_proj.input_scale": "model-00010-of-00010.safetensors",
|
| 2313 |
"model.layers.31.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
| 2314 |
+
"model.layers.31.self_attn.o_proj.weight_scale": "model-00010-of-00010.safetensors",
|
| 2315 |
"model.layers.31.self_attn.q_proj.input_scale": "model-00010-of-00010.safetensors",
|
| 2316 |
"model.layers.31.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
| 2317 |
"model.layers.31.self_attn.q_proj.weight_scale": "model-00010-of-00010.safetensors",
|
|
|
|
| 2398 |
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
| 2399 |
"model.layers.4.self_attn.k_proj.weight_scale": "model-00002-of-00010.safetensors",
|
| 2400 |
"model.layers.4.self_attn.k_scale": "model-00002-of-00010.safetensors",
|
| 2401 |
+
"model.layers.4.self_attn.o_proj.input_scale": "model-00002-of-00010.safetensors",
|
| 2402 |
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
| 2403 |
+
"model.layers.4.self_attn.o_proj.weight_scale": "model-00002-of-00010.safetensors",
|
| 2404 |
"model.layers.4.self_attn.q_proj.input_scale": "model-00002-of-00010.safetensors",
|
| 2405 |
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
| 2406 |
"model.layers.4.self_attn.q_proj.weight_scale": "model-00002-of-00010.safetensors",
|
|
|
|
| 2487 |
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
| 2488 |
"model.layers.5.self_attn.k_proj.weight_scale": "model-00002-of-00010.safetensors",
|
| 2489 |
"model.layers.5.self_attn.k_scale": "model-00002-of-00010.safetensors",
|
| 2490 |
+
"model.layers.5.self_attn.o_proj.input_scale": "model-00002-of-00010.safetensors",
|
| 2491 |
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
| 2492 |
+
"model.layers.5.self_attn.o_proj.weight_scale": "model-00002-of-00010.safetensors",
|
| 2493 |
"model.layers.5.self_attn.q_proj.input_scale": "model-00002-of-00010.safetensors",
|
| 2494 |
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
| 2495 |
"model.layers.5.self_attn.q_proj.weight_scale": "model-00002-of-00010.safetensors",
|
|
|
|
| 2539 |
"model.layers.6.block_sparse_moe.experts.4.w2.input_scale": "model-00002-of-00010.safetensors",
|
| 2540 |
"model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00010.safetensors",
|
| 2541 |
"model.layers.6.block_sparse_moe.experts.4.w2.weight_scale": "model-00002-of-00010.safetensors",
|
| 2542 |
+
"model.layers.6.block_sparse_moe.experts.4.w3.input_scale": "model-00002-of-00010.safetensors",
|
| 2543 |
+
"model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors",
|
| 2544 |
+
"model.layers.6.block_sparse_moe.experts.4.w3.weight_scale": "model-00002-of-00010.safetensors",
|
| 2545 |
+
"model.layers.6.block_sparse_moe.experts.5.w1.input_scale": "model-00002-of-00010.safetensors",
|
| 2546 |
+
"model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00010.safetensors",
|
| 2547 |
+
"model.layers.6.block_sparse_moe.experts.5.w1.weight_scale": "model-00002-of-00010.safetensors",
|
| 2548 |
"model.layers.6.block_sparse_moe.experts.5.w2.input_scale": "model-00003-of-00010.safetensors",
|
| 2549 |
"model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00010.safetensors",
|
| 2550 |
"model.layers.6.block_sparse_moe.experts.5.w2.weight_scale": "model-00003-of-00010.safetensors",
|
|
|
|
| 2576 |
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
| 2577 |
"model.layers.6.self_attn.k_proj.weight_scale": "model-00002-of-00010.safetensors",
|
| 2578 |
"model.layers.6.self_attn.k_scale": "model-00002-of-00010.safetensors",
|
| 2579 |
+
"model.layers.6.self_attn.o_proj.input_scale": "model-00002-of-00010.safetensors",
|
| 2580 |
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
| 2581 |
+
"model.layers.6.self_attn.o_proj.weight_scale": "model-00002-of-00010.safetensors",
|
| 2582 |
"model.layers.6.self_attn.q_proj.input_scale": "model-00002-of-00010.safetensors",
|
| 2583 |
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
| 2584 |
"model.layers.6.self_attn.q_proj.weight_scale": "model-00002-of-00010.safetensors",
|
|
|
|
| 2665 |
"model.layers.7.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
| 2666 |
"model.layers.7.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 2667 |
"model.layers.7.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
| 2668 |
+
"model.layers.7.self_attn.o_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 2669 |
"model.layers.7.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
| 2670 |
+
"model.layers.7.self_attn.o_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 2671 |
"model.layers.7.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 2672 |
"model.layers.7.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
| 2673 |
"model.layers.7.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
|
|
|
| 2754 |
"model.layers.8.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
| 2755 |
"model.layers.8.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 2756 |
"model.layers.8.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
| 2757 |
+
"model.layers.8.self_attn.o_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 2758 |
"model.layers.8.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
| 2759 |
+
"model.layers.8.self_attn.o_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 2760 |
"model.layers.8.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 2761 |
"model.layers.8.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
| 2762 |
"model.layers.8.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
|
|
|
| 2843 |
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
| 2844 |
"model.layers.9.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 2845 |
"model.layers.9.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
| 2846 |
+
"model.layers.9.self_attn.o_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 2847 |
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
| 2848 |
+
"model.layers.9.self_attn.o_proj.weight_scale": "model-00003-of-00010.safetensors",
|
| 2849 |
"model.layers.9.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
| 2850 |
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
| 2851 |
"model.layers.9.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
tokenizer_config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"add_bos_token": true,
|
| 3 |
"add_eos_token": false,
|
| 4 |
-
"add_prefix_space":
|
| 5 |
"added_tokens_decoder": {
|
| 6 |
"0": {
|
| 7 |
"content": "<unk>",
|
|
@@ -30,10 +30,10 @@
|
|
| 30 |
},
|
| 31 |
"additional_special_tokens": [],
|
| 32 |
"bos_token": "<s>",
|
| 33 |
-
"chat_template": "{
|
| 34 |
"clean_up_tokenization_spaces": false,
|
| 35 |
"eos_token": "</s>",
|
| 36 |
-
"legacy":
|
| 37 |
"model_max_length": 1000000000000000019884624838656,
|
| 38 |
"pad_token": null,
|
| 39 |
"sp_model_kwargs": {},
|
|
|
|
| 1 |
{
|
| 2 |
"add_bos_token": true,
|
| 3 |
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": true,
|
| 5 |
"added_tokens_decoder": {
|
| 6 |
"0": {
|
| 7 |
"content": "<unk>",
|
|
|
|
| 30 |
},
|
| 31 |
"additional_special_tokens": [],
|
| 32 |
"bos_token": "<s>",
|
| 33 |
+
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
|
| 34 |
"clean_up_tokenization_spaces": false,
|
| 35 |
"eos_token": "</s>",
|
| 36 |
+
"legacy": true,
|
| 37 |
"model_max_length": 1000000000000000019884624838656,
|
| 38 |
"pad_token": null,
|
| 39 |
"sp_model_kwargs": {},
|