| { | |
| "metadata": { | |
| "ParamSize": 392, | |
| "ParamBytes": 558430208.0, | |
| "BitsPerParam": 3.6444754828825427 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41000960, | |
| "records": [ | |
| { | |
| "name": "transformer.wte.q_weight", | |
| "shape": [ | |
| 49280, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41000960, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "606523ae3635513dfd6f17ee8295e81e" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30484480, | |
| "records": [ | |
| { | |
| "name": "transformer.wte.q_scale", | |
| "shape": [ | |
| 49280, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5125120, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.wpe.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 5125120 | |
| }, | |
| { | |
| "name": "transformer.wpe.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 6829056 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7042048 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7046144 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 7050240 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 8967168 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 9206784 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 9211392 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 10915328 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11128320 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11132416 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11136512 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 11140608 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 17956352 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 18808320 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 18824704 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 25542144 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26381824 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26385920 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26390016 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 26394112 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 28311040 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 28550656 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 28555264 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 30259200 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30472192 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30476288 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30480384 | |
| } | |
| ], | |
| "md5sum": "8ce048787f9afbc33ba261c2ad6151c2" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27027968, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 6815744 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 7667712 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 7684096 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 14401536 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15241216 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15245312 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15249408 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 15253504 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 17170432 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 17410048 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 17414656 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 19118592 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19331584 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19335680 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19339776 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 26159616 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 27011584 | |
| } | |
| ], | |
| "md5sum": "1ab45180b7a59d08ff8d26dc9ffd8051" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31003648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 6717440 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7557120 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7561216 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7565312 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 7569408 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 9486336 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 9725952 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 9730560 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 11434496 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11647488 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11651584 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11655680 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 11659776 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 18475520 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 19327488 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 26061312 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26900992 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26905088 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26909184 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 26913280 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 28830208 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 29069824 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 29074432 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 30778368 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30991360 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30995456 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30999552 | |
| } | |
| ], | |
| "md5sum": "8e07bc57bf1f1940f02ec372702969af" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27027968, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 6815744 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 7667712 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 7684096 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 14401536 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15241216 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15245312 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15249408 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 15253504 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 17170432 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 17410048 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 17414656 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 19118592 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19331584 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19335680 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19339776 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 26159616 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 27011584 | |
| } | |
| ], | |
| "md5sum": "6a28592dd26102827fa7a616df3397d5" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31003648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 6717440 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7557120 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7561216 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7565312 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 7569408 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 9486336 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 9725952 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 9730560 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 11434496 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11647488 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11651584 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11655680 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 11659776 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 18475520 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 19327488 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 26061312 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26900992 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26905088 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26909184 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 26913280 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 28830208 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 29069824 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 29074432 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 30778368 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30991360 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30995456 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30999552 | |
| } | |
| ], | |
| "md5sum": "f06f7a7f3d2f73a2a273265f78ba6843" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27027968, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 6815744 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 7667712 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 7684096 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 14401536 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15241216 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15245312 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15249408 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 15253504 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 17170432 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 17410048 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 17414656 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 19118592 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19331584 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19335680 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19339776 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 26159616 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 27011584 | |
| } | |
| ], | |
| "md5sum": "a1951a7ba32dc40bb55c0877f64ea66a" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31003648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 6717440 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7557120 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7561216 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7565312 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 7569408 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 9486336 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 9725952 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 9730560 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 11434496 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11647488 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11651584 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11655680 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 11659776 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 18475520 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 19327488 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 26061312 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26900992 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26905088 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26909184 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 26913280 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 28830208 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 29069824 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 29074432 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 30778368 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30991360 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30995456 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30999552 | |
| } | |
| ], | |
| "md5sum": "f65c7d31ab02fa807d10b4019146e248" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27027968, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 6815744 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 7667712 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 7684096 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 14401536 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15241216 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15245312 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15249408 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 15253504 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 17170432 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 17410048 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 17414656 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 19118592 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19331584 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19335680 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19339776 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 26159616 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 27011584 | |
| } | |
| ], | |
| "md5sum": "b212963b5c82c5c3c02cf96d37508fd2" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31003648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 6717440 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7557120 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7561216 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7565312 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 7569408 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 9486336 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 9725952 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 9730560 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 11434496 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11647488 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11651584 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11655680 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 11659776 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 18475520 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 19327488 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 26061312 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26900992 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26905088 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26909184 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 26913280 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 28830208 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 29069824 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 29074432 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 30778368 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30991360 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30995456 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30999552 | |
| } | |
| ], | |
| "md5sum": "a0675f5284e954f8974036cdad0049fb" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27027968, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 6815744 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 7667712 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 7684096 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 14401536 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15241216 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15245312 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15249408 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 15253504 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 17170432 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 17410048 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 17414656 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 19118592 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19331584 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19335680 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19339776 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 26159616 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 27011584 | |
| } | |
| ], | |
| "md5sum": "cbe10c01e3fc3e1827997af59d861869" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31003648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 6717440 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7557120 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7561216 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7565312 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 7569408 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 9486336 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 9725952 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 9730560 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 11434496 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11647488 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11651584 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11655680 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 11659776 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 18475520 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 19327488 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 26061312 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26900992 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26905088 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26909184 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 26913280 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 28830208 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 29069824 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 29074432 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 30778368 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30991360 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30995456 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30999552 | |
| } | |
| ], | |
| "md5sum": "0dae6e43d6550288de886eceb7dfb53e" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27027968, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 6815744 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 7667712 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 7684096 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 14401536 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15241216 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15245312 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15249408 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 15253504 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 17170432 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 17410048 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 17414656 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 19118592 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19331584 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19335680 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19339776 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 26159616 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 27011584 | |
| } | |
| ], | |
| "md5sum": "4b15f38ee7b07220edc1330d5a1ac1dc" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31003648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 6717440 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7557120 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7561216 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7565312 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 7569408 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 9486336 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 9725952 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 9730560 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 11434496 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11647488 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11651584 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11655680 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 11659776 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 18475520 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 19327488 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 26061312 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26900992 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26905088 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26909184 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 26913280 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 28830208 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 29069824 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 29074432 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 30778368 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30991360 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30995456 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30999552 | |
| } | |
| ], | |
| "md5sum": "8e1f308884d460271e4a169ee5902ba6" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27027968, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 6815744 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 7667712 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 7684096 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 14401536 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15241216 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15245312 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15249408 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 15253504 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 17170432 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 17410048 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 17414656 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 19118592 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19331584 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19335680 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19339776 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 26159616 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 27011584 | |
| } | |
| ], | |
| "md5sum": "cddf375922759cb0e1ce296fc360285b" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31003648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 6717440 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7557120 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7561216 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7565312 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 7569408 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 9486336 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 9725952 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 9730560 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 11434496 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11647488 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11651584 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 11655680 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 11659776 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 18475520 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 19327488 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 26061312 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26900992 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26905088 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 26909184 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 26913280 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 28830208 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 29069824 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 29074432 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 30778368 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30991360 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30995456 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30999552 | |
| } | |
| ], | |
| "md5sum": "4b195d379c0b42d77e987ed6e53bf7a6" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27027968, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 6815744 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 7667712 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 7684096 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 14401536 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15241216 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15245312 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 15249408 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1916928, | |
| "byteOffset": 15253504 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 239616, | |
| "byteOffset": 17170432 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 17410048 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1703936, | |
| "byteOffset": 17414656 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 212992, | |
| "byteOffset": 19118592 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19331584 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19335680 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19339776 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6815744, | |
| "byteOffset": 19343872 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 851968, | |
| "byteOffset": 26159616 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 27011584 | |
| } | |
| ], | |
| "md5sum": "1fe7215bed7fabf9108de3cbe34937d3" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41000960, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 49280, | |
| 208 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41000960, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "606523ae3635513dfd6f17ee8295e81e" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 12694528, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 820 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6717440, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 205 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 839680, | |
| "byteOffset": 6717440 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7557120 | |
| }, | |
| { | |
| "name": "transformer.ln_f.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7561216 | |
| }, | |
| { | |
| "name": "transformer.ln_f.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 7565312 | |
| }, | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 49280, | |
| 52 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5125120, | |
| "byteOffset": 7569408 | |
| } | |
| ], | |
| "md5sum": "5fe41842755fedf4d128d8d6a69bf90c" | |
| } | |
| ] | |
| } |