diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4279 @@ +{ + "metadata": { + "ParamSize": 392, + "ParamBytes": 558430208.0, + "BitsPerParam": 3.6444754828825427 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 41000960, + "records": [ + { + "name": "transformer.wte.q_weight", + "shape": [ + 49280, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41000960, + "byteOffset": 0 + } + ], + "md5sum": "606523ae3635513dfd6f17ee8295e81e" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 30484480, + "records": [ + { + "name": "transformer.wte.q_scale", + "shape": [ + 49280, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5125120, + "byteOffset": 0 + }, + { + "name": "transformer.wpe.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 5125120 + }, + { + "name": "transformer.wpe.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 6829056 + }, + { + "name": "transformer.h.0.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7042048 + }, + { + "name": "transformer.h.0.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7046144 + }, + { + "name": "transformer.h.0.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 7050240 + }, + { + "name": "transformer.h.0.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 8967168 + }, + { + "name": "transformer.h.0.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 9206784 + }, + { + "name": "transformer.h.0.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 9211392 + }, + { + "name": "transformer.h.0.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 10915328 + }, + { + "name": "transformer.h.0.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11128320 + }, + { + "name": "transformer.h.0.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11132416 + }, + { + "name": "transformer.h.0.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11136512 + }, + { + "name": "transformer.h.0.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 11140608 + }, + { + "name": "transformer.h.0.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 17956352 + }, + { + "name": "transformer.h.0.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18808320 + }, + { + "name": "transformer.h.0.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 18824704 + }, + { + "name": "transformer.h.0.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 25542144 + }, + { + "name": "transformer.h.0.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26381824 + }, + { + "name": "transformer.h.1.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26385920 + }, + { + "name": "transformer.h.1.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26390016 + }, + { + "name": "transformer.h.1.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 26394112 + }, + { + "name": "transformer.h.1.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 28311040 + }, + { + "name": "transformer.h.1.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 28550656 + }, + { + "name": "transformer.h.1.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 28555264 + }, + { + "name": "transformer.h.1.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 30259200 + }, + { + "name": "transformer.h.1.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30472192 + }, + { + "name": "transformer.h.1.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30476288 + }, + { + "name": "transformer.h.1.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30480384 + } + ], + "md5sum": "8ce048787f9afbc33ba261c2ad6151c2" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 27027968, + "records": [ + { + "name": "transformer.h.1.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 0 + }, + { + "name": "transformer.h.1.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 6815744 + }, + { + "name": "transformer.h.1.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 7667712 + }, + { + "name": "transformer.h.1.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 7684096 + }, + { + "name": "transformer.h.1.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 14401536 + }, + { + "name": "transformer.h.1.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15241216 + }, + { + "name": "transformer.h.2.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15245312 + }, + { + "name": "transformer.h.2.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15249408 + }, + { + "name": "transformer.h.2.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 15253504 + }, + { + "name": "transformer.h.2.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 17170432 + }, + { + "name": "transformer.h.2.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 17410048 + }, + { + "name": "transformer.h.2.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 17414656 + }, + { + "name": "transformer.h.2.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 19118592 + }, + { + "name": "transformer.h.2.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19331584 + }, + { + "name": "transformer.h.2.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19335680 + }, + { + "name": "transformer.h.2.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19339776 + }, + { + "name": "transformer.h.2.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.2.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 26159616 + }, + { + "name": "transformer.h.2.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 27011584 + } + ], + "md5sum": "1ab45180b7a59d08ff8d26dc9ffd8051" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31003648, + "records": [ + { + "name": "transformer.h.2.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 6717440 + }, + { + "name": "transformer.h.2.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7557120 + }, + { + "name": "transformer.h.3.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7561216 + }, + { + "name": "transformer.h.3.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7565312 + }, + { + "name": "transformer.h.3.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 7569408 + }, + { + "name": "transformer.h.3.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 9486336 + }, + { + "name": "transformer.h.3.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 9725952 + }, + { + "name": "transformer.h.3.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 9730560 + }, + { + "name": "transformer.h.3.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 11434496 + }, + { + "name": "transformer.h.3.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11647488 + }, + { + "name": "transformer.h.3.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11651584 + }, + { + "name": "transformer.h.3.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11655680 + }, + { + "name": "transformer.h.3.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 11659776 + }, + { + "name": "transformer.h.3.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 18475520 + }, + { + "name": "transformer.h.3.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19327488 + }, + { + "name": "transformer.h.3.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.3.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 26061312 + }, + { + "name": "transformer.h.3.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26900992 + }, + { + "name": "transformer.h.4.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26905088 + }, + { + "name": "transformer.h.4.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26909184 + }, + { + "name": "transformer.h.4.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 26913280 + }, + { + "name": "transformer.h.4.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 28830208 + }, + { + "name": "transformer.h.4.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 29069824 + }, + { + "name": "transformer.h.4.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 29074432 + }, + { + "name": "transformer.h.4.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 30778368 + }, + { + "name": "transformer.h.4.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30991360 + }, + { + "name": "transformer.h.4.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30995456 + }, + { + "name": "transformer.h.4.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30999552 + } + ], + "md5sum": "8e07bc57bf1f1940f02ec372702969af" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 27027968, + "records": [ + { + "name": "transformer.h.4.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 0 + }, + { + "name": "transformer.h.4.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 6815744 + }, + { + "name": "transformer.h.4.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 7667712 + }, + { + "name": "transformer.h.4.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 7684096 + }, + { + "name": "transformer.h.4.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 14401536 + }, + { + "name": "transformer.h.4.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15241216 + }, + { + "name": "transformer.h.5.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15245312 + }, + { + "name": "transformer.h.5.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15249408 + }, + { + "name": "transformer.h.5.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 15253504 + }, + { + "name": "transformer.h.5.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 17170432 + }, + { + "name": "transformer.h.5.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 17410048 + }, + { + "name": "transformer.h.5.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 17414656 + }, + { + "name": "transformer.h.5.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 19118592 + }, + { + "name": "transformer.h.5.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19331584 + }, + { + "name": "transformer.h.5.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19335680 + }, + { + "name": "transformer.h.5.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19339776 + }, + { + "name": "transformer.h.5.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.5.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 26159616 + }, + { + "name": "transformer.h.5.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 27011584 + } + ], + "md5sum": "6a28592dd26102827fa7a616df3397d5" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 31003648, + "records": [ + { + "name": "transformer.h.5.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 0 + }, + { + "name": "transformer.h.5.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 6717440 + }, + { + "name": "transformer.h.5.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7557120 + }, + { + "name": "transformer.h.6.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7561216 + }, + { + "name": "transformer.h.6.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7565312 + }, + { + "name": "transformer.h.6.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 7569408 + }, + { + "name": "transformer.h.6.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 9486336 + }, + { + "name": "transformer.h.6.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 9725952 + }, + { + "name": "transformer.h.6.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 9730560 + }, + { + "name": "transformer.h.6.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 11434496 + }, + { + "name": "transformer.h.6.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11647488 + }, + { + "name": "transformer.h.6.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11651584 + }, + { + "name": "transformer.h.6.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11655680 + }, + { + "name": "transformer.h.6.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 11659776 + }, + { + "name": "transformer.h.6.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 18475520 + }, + { + "name": "transformer.h.6.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19327488 + }, + { + "name": "transformer.h.6.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.6.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 26061312 + }, + { + "name": "transformer.h.6.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26900992 + }, + { + "name": "transformer.h.7.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26905088 + }, + { + "name": "transformer.h.7.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26909184 + }, + { + "name": "transformer.h.7.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 26913280 + }, + { + "name": "transformer.h.7.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 28830208 + }, + { + "name": "transformer.h.7.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 29069824 + }, + { + "name": "transformer.h.7.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 29074432 + }, + { + "name": "transformer.h.7.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 30778368 + }, + { + "name": "transformer.h.7.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30991360 + }, + { + "name": "transformer.h.7.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30995456 + }, + { + "name": "transformer.h.7.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30999552 + } + ], + "md5sum": "f06f7a7f3d2f73a2a273265f78ba6843" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 27027968, + "records": [ + { + "name": "transformer.h.7.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 0 + }, + { + "name": "transformer.h.7.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 6815744 + }, + { + "name": "transformer.h.7.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 7667712 + }, + { + "name": "transformer.h.7.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 7684096 + }, + { + "name": "transformer.h.7.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 14401536 + }, + { + "name": "transformer.h.7.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15241216 + }, + { + "name": "transformer.h.8.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15245312 + }, + { + "name": "transformer.h.8.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15249408 + }, + { + "name": "transformer.h.8.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 15253504 + }, + { + "name": "transformer.h.8.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 17170432 + }, + { + "name": "transformer.h.8.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 17410048 + }, + { + "name": "transformer.h.8.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 17414656 + }, + { + "name": "transformer.h.8.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 19118592 + }, + { + "name": "transformer.h.8.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19331584 + }, + { + "name": "transformer.h.8.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19335680 + }, + { + "name": "transformer.h.8.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19339776 + }, + { + "name": "transformer.h.8.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.8.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 26159616 + }, + { + "name": "transformer.h.8.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 27011584 + } + ], + "md5sum": "a1951a7ba32dc40bb55c0877f64ea66a" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 31003648, + "records": [ + { + "name": "transformer.h.8.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 0 + }, + { + "name": "transformer.h.8.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 6717440 + }, + { + "name": "transformer.h.8.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7557120 + }, + { + "name": "transformer.h.9.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7561216 + }, + { + "name": "transformer.h.9.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7565312 + }, + { + "name": "transformer.h.9.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 7569408 + }, + { + "name": "transformer.h.9.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 9486336 + }, + { + "name": "transformer.h.9.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 9725952 + }, + { + "name": "transformer.h.9.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 9730560 + }, + { + "name": "transformer.h.9.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 11434496 + }, + { + "name": "transformer.h.9.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11647488 + }, + { + "name": "transformer.h.9.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11651584 + }, + { + "name": "transformer.h.9.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11655680 + }, + { + "name": "transformer.h.9.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 11659776 + }, + { + "name": "transformer.h.9.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 18475520 + }, + { + "name": "transformer.h.9.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19327488 + }, + { + "name": "transformer.h.9.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.9.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 26061312 + }, + { + "name": "transformer.h.9.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26900992 + }, + { + "name": "transformer.h.10.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26905088 + }, + { + "name": "transformer.h.10.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26909184 + }, + { + "name": "transformer.h.10.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 26913280 + }, + { + "name": "transformer.h.10.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 28830208 + }, + { + "name": "transformer.h.10.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 29069824 + }, + { + "name": "transformer.h.10.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 29074432 + }, + { + "name": "transformer.h.10.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 30778368 + }, + { + "name": "transformer.h.10.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30991360 + }, + { + "name": "transformer.h.10.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30995456 + }, + { + "name": "transformer.h.10.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30999552 + } + ], + "md5sum": "f65c7d31ab02fa807d10b4019146e248" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 27027968, + "records": [ + { + "name": "transformer.h.10.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 0 + }, + { + "name": "transformer.h.10.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 6815744 + }, + { + "name": "transformer.h.10.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 7667712 + }, + { + "name": "transformer.h.10.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 7684096 + }, + { + "name": "transformer.h.10.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 14401536 + }, + { + "name": "transformer.h.10.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15241216 + }, + { + "name": "transformer.h.11.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15245312 + }, + { + "name": "transformer.h.11.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15249408 + }, + { + "name": "transformer.h.11.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 15253504 + }, + { + "name": "transformer.h.11.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 17170432 + }, + { + "name": "transformer.h.11.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 17410048 + }, + { + "name": "transformer.h.11.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 17414656 + }, + { + "name": "transformer.h.11.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 19118592 + }, + { + "name": "transformer.h.11.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19331584 + }, + { + "name": "transformer.h.11.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19335680 + }, + { + "name": "transformer.h.11.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19339776 + }, + { + "name": "transformer.h.11.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.11.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 26159616 + }, + { + "name": "transformer.h.11.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 27011584 + } + ], + "md5sum": "b212963b5c82c5c3c02cf96d37508fd2" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 31003648, + "records": [ + { + "name": "transformer.h.11.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 0 + }, + { + "name": "transformer.h.11.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 6717440 + }, + { + "name": "transformer.h.11.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7557120 + }, + { + "name": "transformer.h.12.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7561216 + }, + { + "name": "transformer.h.12.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7565312 + }, + { + "name": "transformer.h.12.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 7569408 + }, + { + "name": "transformer.h.12.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 9486336 + }, + { + "name": "transformer.h.12.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 9725952 + }, + { + "name": "transformer.h.12.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 9730560 + }, + { + "name": "transformer.h.12.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 11434496 + }, + { + "name": "transformer.h.12.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11647488 + }, + { + "name": "transformer.h.12.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11651584 + }, + { + "name": "transformer.h.12.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11655680 + }, + { + "name": "transformer.h.12.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 11659776 + }, + { + "name": "transformer.h.12.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 18475520 + }, + { + "name": "transformer.h.12.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19327488 + }, + { + "name": "transformer.h.12.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.12.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 26061312 + }, + { + "name": "transformer.h.12.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26900992 + }, + { + "name": "transformer.h.13.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26905088 + }, + { + "name": "transformer.h.13.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26909184 + }, + { + "name": "transformer.h.13.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 26913280 + }, + { + "name": "transformer.h.13.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 28830208 + }, + { + "name": "transformer.h.13.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 29069824 + }, + { + "name": "transformer.h.13.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 29074432 + }, + { + "name": "transformer.h.13.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 30778368 + }, + { + "name": "transformer.h.13.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30991360 + }, + { + "name": "transformer.h.13.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30995456 + }, + { + "name": "transformer.h.13.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30999552 + } + ], + "md5sum": "a0675f5284e954f8974036cdad0049fb" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 27027968, + "records": [ + { + "name": "transformer.h.13.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 0 + }, + { + "name": "transformer.h.13.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 6815744 + }, + { + "name": "transformer.h.13.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 7667712 + }, + { + "name": "transformer.h.13.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 7684096 + }, + { + "name": "transformer.h.13.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 14401536 + }, + { + "name": "transformer.h.13.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15241216 + }, + { + "name": "transformer.h.14.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15245312 + }, + { + "name": "transformer.h.14.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15249408 + }, + { + "name": "transformer.h.14.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 15253504 + }, + { + "name": "transformer.h.14.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 17170432 + }, + { + "name": "transformer.h.14.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 17410048 + }, + { + "name": "transformer.h.14.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 17414656 + }, + { + "name": "transformer.h.14.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 19118592 + }, + { + "name": "transformer.h.14.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19331584 + }, + { + "name": "transformer.h.14.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19335680 + }, + { + "name": "transformer.h.14.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19339776 + }, + { + "name": "transformer.h.14.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.14.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 26159616 + }, + { + "name": "transformer.h.14.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 27011584 + } + ], + "md5sum": "cbe10c01e3fc3e1827997af59d861869" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 31003648, + "records": [ + { + "name": "transformer.h.14.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 6717440 + }, + { + "name": "transformer.h.14.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7557120 + }, + { + "name": "transformer.h.15.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7561216 + }, + { + "name": "transformer.h.15.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7565312 + }, + { + "name": "transformer.h.15.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 7569408 + }, + { + "name": "transformer.h.15.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 9486336 + }, + { + "name": "transformer.h.15.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 9725952 + }, + { + "name": "transformer.h.15.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 9730560 + }, + { + "name": "transformer.h.15.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 11434496 + }, + { + "name": "transformer.h.15.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11647488 + }, + { + "name": "transformer.h.15.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11651584 + }, + { + "name": "transformer.h.15.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11655680 + }, + { + "name": "transformer.h.15.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 11659776 + }, + { + "name": "transformer.h.15.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 18475520 + }, + { + "name": "transformer.h.15.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19327488 + }, + { + "name": "transformer.h.15.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.15.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 26061312 + }, + { + "name": "transformer.h.15.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26900992 + }, + { + "name": "transformer.h.16.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26905088 + }, + { + "name": "transformer.h.16.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26909184 + }, + { + "name": "transformer.h.16.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 26913280 + }, + { + "name": "transformer.h.16.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 28830208 + }, + { + "name": "transformer.h.16.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 29069824 + }, + { + "name": "transformer.h.16.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 29074432 + }, + { + "name": "transformer.h.16.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 30778368 + }, + { + "name": "transformer.h.16.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30991360 + }, + { + "name": "transformer.h.16.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30995456 + }, + { + "name": "transformer.h.16.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30999552 + } + ], + "md5sum": "0dae6e43d6550288de886eceb7dfb53e" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 27027968, + "records": [ + { + "name": "transformer.h.16.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 0 + }, + { + "name": "transformer.h.16.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 6815744 + }, + { + "name": "transformer.h.16.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 7667712 + }, + { + "name": "transformer.h.16.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 7684096 + }, + { + "name": "transformer.h.16.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 14401536 + }, + { + "name": "transformer.h.16.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15241216 + }, + { + "name": "transformer.h.17.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15245312 + }, + { + "name": "transformer.h.17.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15249408 + }, + { + "name": "transformer.h.17.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 15253504 + }, + { + "name": "transformer.h.17.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 17170432 + }, + { + "name": "transformer.h.17.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 17410048 + }, + { + "name": "transformer.h.17.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 17414656 + }, + { + "name": "transformer.h.17.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 19118592 + }, + { + "name": "transformer.h.17.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19331584 + }, + { + "name": "transformer.h.17.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19335680 + }, + { + "name": "transformer.h.17.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19339776 + }, + { + "name": "transformer.h.17.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.17.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 26159616 + }, + { + "name": "transformer.h.17.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 27011584 + } + ], + "md5sum": "4b15f38ee7b07220edc1330d5a1ac1dc" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 31003648, + "records": [ + { + "name": "transformer.h.17.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 0 + }, + { + "name": "transformer.h.17.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 6717440 + }, + { + "name": "transformer.h.17.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7557120 + }, + { + "name": "transformer.h.18.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7561216 + }, + { + "name": "transformer.h.18.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7565312 + }, + { + "name": "transformer.h.18.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 7569408 + }, + { + "name": "transformer.h.18.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 9486336 + }, + { + "name": "transformer.h.18.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 9725952 + }, + { + "name": "transformer.h.18.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 9730560 + }, + { + "name": "transformer.h.18.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 11434496 + }, + { + "name": "transformer.h.18.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11647488 + }, + { + "name": "transformer.h.18.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11651584 + }, + { + "name": "transformer.h.18.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11655680 + }, + { + "name": "transformer.h.18.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 11659776 + }, + { + "name": "transformer.h.18.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 18475520 + }, + { + "name": "transformer.h.18.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19327488 + }, + { + "name": "transformer.h.18.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.18.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 26061312 + }, + { + "name": "transformer.h.18.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26900992 + }, + { + "name": "transformer.h.19.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26905088 + }, + { + "name": "transformer.h.19.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26909184 + }, + { + "name": "transformer.h.19.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 26913280 + }, + { + "name": "transformer.h.19.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 28830208 + }, + { + "name": "transformer.h.19.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 29069824 + }, + { + "name": "transformer.h.19.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 29074432 + }, + { + "name": "transformer.h.19.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 30778368 + }, + { + "name": "transformer.h.19.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30991360 + }, + { + "name": "transformer.h.19.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30995456 + }, + { + "name": "transformer.h.19.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30999552 + } + ], + "md5sum": "8e1f308884d460271e4a169ee5902ba6" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 27027968, + "records": [ + { + "name": "transformer.h.19.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 0 + }, + { + "name": "transformer.h.19.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 6815744 + }, + { + "name": "transformer.h.19.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 7667712 + }, + { + "name": "transformer.h.19.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 7684096 + }, + { + "name": "transformer.h.19.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 14401536 + }, + { + "name": "transformer.h.19.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15241216 + }, + { + "name": "transformer.h.20.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15245312 + }, + { + "name": "transformer.h.20.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15249408 + }, + { + "name": "transformer.h.20.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 15253504 + }, + { + "name": "transformer.h.20.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 17170432 + }, + { + "name": "transformer.h.20.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 17410048 + }, + { + "name": "transformer.h.20.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 17414656 + }, + { + "name": "transformer.h.20.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 19118592 + }, + { + "name": "transformer.h.20.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19331584 + }, + { + "name": "transformer.h.20.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19335680 + }, + { + "name": "transformer.h.20.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19339776 + }, + { + "name": "transformer.h.20.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.20.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 26159616 + }, + { + "name": "transformer.h.20.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 27011584 + } + ], + "md5sum": "cddf375922759cb0e1ce296fc360285b" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 31003648, + "records": [ + { + "name": "transformer.h.20.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 0 + }, + { + "name": "transformer.h.20.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 6717440 + }, + { + "name": "transformer.h.20.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7557120 + }, + { + "name": "transformer.h.21.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7561216 + }, + { + "name": "transformer.h.21.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7565312 + }, + { + "name": "transformer.h.21.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 7569408 + }, + { + "name": "transformer.h.21.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 9486336 + }, + { + "name": "transformer.h.21.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 9725952 + }, + { + "name": "transformer.h.21.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 9730560 + }, + { + "name": "transformer.h.21.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 11434496 + }, + { + "name": "transformer.h.21.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11647488 + }, + { + "name": "transformer.h.21.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11651584 + }, + { + "name": "transformer.h.21.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11655680 + }, + { + "name": "transformer.h.21.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 11659776 + }, + { + "name": "transformer.h.21.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 18475520 + }, + { + "name": "transformer.h.21.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19327488 + }, + { + "name": "transformer.h.21.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.21.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 26061312 + }, + { + "name": "transformer.h.21.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26900992 + }, + { + "name": "transformer.h.22.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26905088 + }, + { + "name": "transformer.h.22.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26909184 + }, + { + "name": "transformer.h.22.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 26913280 + }, + { + "name": "transformer.h.22.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 28830208 + }, + { + "name": "transformer.h.22.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 29069824 + }, + { + "name": "transformer.h.22.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 29074432 + }, + { + "name": "transformer.h.22.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 30778368 + }, + { + "name": "transformer.h.22.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30991360 + }, + { + "name": "transformer.h.22.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30995456 + }, + { + "name": "transformer.h.22.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 30999552 + } + ], + "md5sum": "4b195d379c0b42d77e987ed6e53bf7a6" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 27027968, + "records": [ + { + "name": "transformer.h.22.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 0 + }, + { + "name": "transformer.h.22.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 6815744 + }, + { + "name": "transformer.h.22.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 7667712 + }, + { + "name": "transformer.h.22.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 7684096 + }, + { + "name": "transformer.h.22.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 14401536 + }, + { + "name": "transformer.h.22.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15241216 + }, + { + "name": "transformer.h.23.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15245312 + }, + { + "name": "transformer.h.23.ln_1.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15249408 + }, + { + "name": "transformer.h.23.attn.c_attn.q_weight", + "shape": [ + 2304, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1916928, + "byteOffset": 15253504 + }, + { + "name": "transformer.h.23.attn.c_attn.q_scale", + "shape": [ + 2304, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 239616, + "byteOffset": 17170432 + }, + { + "name": "transformer.h.23.attn.c_attn.bias", + "shape": [ + 2304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 17410048 + }, + { + "name": "transformer.h.23.attn.c_proj.q_weight", + "shape": [ + 2048, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1703936, + "byteOffset": 17414656 + }, + { + "name": "transformer.h.23.attn.c_proj.q_scale", + "shape": [ + 2048, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 212992, + "byteOffset": 19118592 + }, + { + "name": "transformer.h.23.attn.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19331584 + }, + { + "name": "transformer.h.23.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19335680 + }, + { + "name": "transformer.h.23.ln_2.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19339776 + }, + { + "name": "transformer.h.23.mlp.c_fc.q_weight", + "shape": [ + 8192, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6815744, + "byteOffset": 19343872 + }, + { + "name": "transformer.h.23.mlp.c_fc.q_scale", + "shape": [ + 8192, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 851968, + "byteOffset": 26159616 + }, + { + "name": "transformer.h.23.mlp.c_fc.bias", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 27011584 + } + ], + "md5sum": "1fe7215bed7fabf9108de3cbe34937d3" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 41000960, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 49280, + 208 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41000960, + "byteOffset": 0 + } + ], + "md5sum": "606523ae3635513dfd6f17ee8295e81e" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 12694528, + "records": [ + { + "name": "transformer.h.23.mlp.c_proj.q_weight", + "shape": [ + 2048, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6717440, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.mlp.c_proj.q_scale", + "shape": [ + 2048, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 839680, + "byteOffset": 6717440 + }, + { + "name": "transformer.h.23.mlp.c_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7557120 + }, + { + "name": "transformer.ln_f.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7561216 + }, + { + "name": "transformer.ln_f.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 7565312 + }, + { + "name": "lm_head.q_scale", + "shape": [ + 49280, + 52 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5125120, + "byteOffset": 7569408 + } + ], + "md5sum": "5fe41842755fedf4d128d8d6a69bf90c" + } + ] +} \ No newline at end of file