{ "metadata": { "ParamSize": 302, "ParamBytes": 8139319952.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 632496128, "records": [ { "name": "lm_head.weight", "shape": [ 77209, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 632496128, "byteOffset": 0 } ], "md5sum": "1ef40da1b2aa577312474c28b8e796a0" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 38050824, "records": [ { "name": "model.layers.19.mlp.down_proj.BLinear.weight", "shape": [ 1659, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 38050824, "byteOffset": 0 } ], "md5sum": "f8457cbb1305d9b194147625ec96d141" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 27683752, "records": [ { "name": "model.layers.20.mlp.down_proj.BLinear.weight", "shape": [ 1207, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27683752, "byteOffset": 0 } ], "md5sum": "de34f7540119bfbbb3272ba0fa450819" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 63807952, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1391 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 63807952, "byteOffset": 0 } ], "md5sum": "1183d93a8ab5ca02c182588fdab76093" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 21012480, "records": [ { "name": "model.layers.21.mlp.down_proj.ALinear.weight", "shape": [ 4096, 2565 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 21012480, "byteOffset": 0 } ], "md5sum": "dfdd9520664b7578e4dfa1ee9d13db02" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 58830840, "records": [ { "name": "model.layers.21.mlp.down_proj.BLinear.weight", "shape": [ 2565, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58830840, "byteOffset": 0 } ], "md5sum": "12ef7be1b51257c5b9ac1aabd8cc4a6f" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 111606576, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 2433 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 111606576, "byteOffset": 0 } ], "md5sum": "5628ec23f9739c60110af69bb826809b" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 19931136, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.BLinear.weight", "shape": [ 2433, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19931136, "byteOffset": 0 } ], "md5sum": "5ac55c91ab61a908a2a55ef6d2624ca5" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 32568320, "records": [ { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16384 }, { "name": "model.layers.20.mlp.down_proj.ALinear.weight", "shape": [ 4096, 1207 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9887744, "byteOffset": 24576 }, { "name": "model.layers.20.mlp.gate_up_proj.BLinear.weight", "shape": [ 1391, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11395072, "byteOffset": 9912320 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21307392 }, { "name": "model.layers.20.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 191 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1564672, "byteOffset": 21315584 }, { "name": "model.layers.20.self_attn.o_proj.BLinear.weight", "shape": [ 191, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1369088, "byteOffset": 22880256 }, { "name": "model.layers.20.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 232 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2494464, "byteOffset": 24249344 }, { "name": "model.layers.20.self_attn.qkv_proj.BLinear.weight", "shape": [ 232, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1900544, "byteOffset": 26743808 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 28644352 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 28652544 }, { "name": "model.layers.21.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 28660736 } ], "md5sum": "9d51095e773ebe2dbc3daebb9b613060" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 48440832, "records": [ { "name": "model.layers.22.mlp.down_proj.BLinear.weight", "shape": [ 2112, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 48440832, "byteOffset": 0 } ], "md5sum": "a9ed21d814ad365edb42b8a39f3744bf" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 87707264, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87707264, "byteOffset": 0 } ], "md5sum": "96c8c0213d613251fc9f0ca6555e53e7" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 31735296, "records": [ { "name": "model.layers.21.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 3419136 }, { "name": "model.layers.21.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 9666048 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14425600 }, { "name": "model.layers.22.mlp.down_proj.ALinear.weight", "shape": [ 4096, 2112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17301504, "byteOffset": 14433792 } ], "md5sum": "ec4affbe343d5253f37852cf2ffa0fce" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 29244928, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.BLinear.weight", "shape": [ 1912, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15663104, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 15663104 }, { "name": "model.layers.22.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 15671296 }, { "name": "model.layers.22.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 19578880 }, { "name": "model.layers.22.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 22998016 } ], "md5sum": "a24d3185dd2dc2cb2a26745792bfa023" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 27683752, "records": [ { "name": "model.layers.23.mlp.down_proj.BLinear.weight", "shape": [ 1207, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27683752, "byteOffset": 0 } ], "md5sum": "951cae63fc176523312853c49dafa6bd" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 63807952, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1391 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 63807952, "byteOffset": 0 } ], "md5sum": "4dfa691d7836b03effd92b6bb3e457e3" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33385472, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4759552 }, { "name": "model.layers.23.mlp.down_proj.ALinear.weight", "shape": [ 4096, 1207 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9887744, "byteOffset": 4767744 }, { "name": "model.layers.23.mlp.gate_up_proj.BLinear.weight", "shape": [ 1391, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11395072, "byteOffset": 14655488 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26050560 }, { "name": "model.layers.23.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 26058752 }, { "name": "model.layers.23.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 29966336 } ], "md5sum": "41d794e65f67e0ff7786cac18d1e1bbe" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 27683752, "records": [ { "name": "model.layers.24.mlp.down_proj.BLinear.weight", "shape": [ 1207, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27683752, "byteOffset": 0 } ], "md5sum": "031b5ebb2824eb421d3b17b0749dfa18" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 63807952, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1391 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 63807952, "byteOffset": 0 } ], "md5sum": "5779c0986d4026ba997cf42c55cf250f" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 32305664, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 6246912 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11006464 }, { "name": "model.layers.24.mlp.down_proj.ALinear.weight", "shape": [ 4096, 1207 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9887744, "byteOffset": 11014656 }, { "name": "model.layers.24.mlp.gate_up_proj.BLinear.weight", "shape": [ 1391, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11395072, "byteOffset": 20902400 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32297472 } ], "md5sum": "a57b7803d3f45f9f897f8d4ae680e44c" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 27683752, "records": [ { "name": "model.layers.25.mlp.down_proj.BLinear.weight", "shape": [ 1207, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27683752, "byteOffset": 0 } ], "md5sum": "6477e513723b9bedb0798ee311af8bcd" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 63807952, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1391 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 63807952, "byteOffset": 0 } ], "md5sum": "a26d20d224d5f7f0e52ba2ff40663047" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 23836160, "records": [ { "name": "model.layers.24.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 191 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1564672, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.BLinear.weight", "shape": [ 191, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1369088, "byteOffset": 1564672 }, { "name": "model.layers.24.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 2933760 }, { "name": "model.layers.24.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 9180672 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13940224 }, { "name": "model.layers.25.mlp.down_proj.ALinear.weight", "shape": [ 4096, 1207 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9887744, "byteOffset": 13948416 } ], "md5sum": "21a33551d6e245a1f13954f23a4e61b1" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29744640, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.BLinear.weight", "shape": [ 1391, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11395072, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11395072 }, { "name": "model.layers.25.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 11403264 }, { "name": "model.layers.25.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 15310848 }, { "name": "model.layers.25.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 18729984 }, { "name": "model.layers.25.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 24976896 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29736448 } ], "md5sum": "abbeca6629deda705e2d8b28ddb90fd3" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 27683752, "records": [ { "name": "model.layers.26.mlp.down_proj.BLinear.weight", "shape": [ 1207, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27683752, "byteOffset": 0 } ], "md5sum": "843e100b8e95bffc69fef8677a1f2bd4" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 63807952, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1391 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 63807952, "byteOffset": 0 } ], "md5sum": "2322d65782d9a288a61e0de25f276ef4" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 30471680, "records": [ { "name": "model.layers.26.mlp.down_proj.ALinear.weight", "shape": [ 4096, 1207 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9887744, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_up_proj.BLinear.weight", "shape": [ 1391, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11395072, "byteOffset": 9887744 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21282816 }, { "name": "model.layers.26.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 191 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1564672, "byteOffset": 21291008 }, { "name": "model.layers.26.self_attn.o_proj.BLinear.weight", "shape": [ 191, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1369088, "byteOffset": 22855680 }, { "name": "model.layers.26.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 24224768 } ], "md5sum": "312c4c7a7f666c9508a5b5d2ff5b2322" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 27683752, "records": [ { "name": "model.layers.27.mlp.down_proj.BLinear.weight", "shape": [ 1207, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27683752, "byteOffset": 0 } ], "md5sum": "c9b16d8eee239b948e52483c71dd36f5" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 63807952, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1391 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 63807952, "byteOffset": 0 } ], "md5sum": "bc04288055221c0bd15af837087ff1b9" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 28992512, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4759552 }, { "name": "model.layers.27.mlp.down_proj.ALinear.weight", "shape": [ 4096, 1207 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9887744, "byteOffset": 4767744 }, { "name": "model.layers.27.mlp.gate_up_proj.BLinear.weight", "shape": [ 1391, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11395072, "byteOffset": 14655488 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26050560 }, { "name": "model.layers.27.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 191 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1564672, "byteOffset": 26058752 }, { "name": "model.layers.27.self_attn.o_proj.BLinear.weight", "shape": [ 191, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1369088, "byteOffset": 27623424 } ], "md5sum": "098737591bc6dc978dc113a4b078f3e0" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 77615104, "records": [ { "name": "model.layers.28.mlp.down_proj.BLinear.weight", "shape": [ 2707, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 77615104, "byteOffset": 0 } ], "md5sum": "18d1756e4960c4c2a38127434ea784a4" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 113025024, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.ALinear.weight", "shape": [ 28672, 1971 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 113025024, "byteOffset": 0 } ], "md5sum": "649e0277fa1bca1bd090c0e5c6068813" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 33190400, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 6246912 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11006464 }, { "name": "model.layers.28.mlp.down_proj.ALinear.weight", "shape": [ 4096, 2707 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22175744, "byteOffset": 11014656 } ], "md5sum": "d5045dc220e00a8a20c8c5876cc815a3" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4e848a4b351e0214f5abb821f35a723d" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "408c4819c06ae5ef716b258adcc7be9c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33044992, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.BLinear.weight", "shape": [ 1971, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16146432, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16146432 }, { "name": "model.layers.28.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 191 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1564672, "byteOffset": 16154624 }, { "name": "model.layers.28.self_attn.o_proj.BLinear.weight", "shape": [ 191, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1369088, "byteOffset": 17719296 }, { "name": "model.layers.28.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 19088384 }, { "name": "model.layers.28.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 25335296 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30094848 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30103040 }, { "name": "model.layers.29.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 191 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1564672, "byteOffset": 30111232 }, { "name": "model.layers.29.self_attn.o_proj.BLinear.weight", "shape": [ 191, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1369088, "byteOffset": 31675904 } ], "md5sum": "46f11a2a4c93a105d7e632c109d2f96d" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "77eb835d42f2dcd4e9d498355bfef0e8" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "60cb74c45fe152e352fc6abfc8ff53a0" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 18259968, "records": [ { "name": "model.layers.31.mlp.down_proj.ALinear.weight", "shape": [ 4096, 2229 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18259968, "byteOffset": 0 } ], "md5sum": "5192c6258a8e7288d066ee8bb2a513ea" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 63909888, "records": [ { "name": "model.layers.31.mlp.down_proj.BLinear.weight", "shape": [ 2229, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 63909888, "byteOffset": 0 } ], "md5sum": "84bbf8a9c002f4ce2f70f9ec7a2d18b1" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "467a313cf6da3a3b13de62551c43800b" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 32306176, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 6246912 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11006464 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11014656 }, { "name": "model.layers.30.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 191 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1564672, "byteOffset": 11022848 }, { "name": "model.layers.30.self_attn.o_proj.BLinear.weight", "shape": [ 191, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1369088, "byteOffset": 12587520 }, { "name": "model.layers.30.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 13956608 }, { "name": "model.layers.30.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 20203520 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24963072 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24971264 }, { "name": "model.layers.31.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 24979456 }, { "name": "model.layers.31.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 28887040 } ], "md5sum": "db01746957093116efa641a28390a77e" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 632496128, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 77209, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 632496128, "byteOffset": 0 } ], "md5sum": "600bedf25cd1c194c80b945099b42cec" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4992c39a3eef29c99e4f393f652d79a2" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 143818752, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.ALinear.weight", "shape": [ 28672, 2508 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 143818752, "byteOffset": 0 } ], "md5sum": "d508b3269dce3180a74da519b1bc632c" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 31576576, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 6246912 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11006464 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11014656 }, { "name": "model.layers.0.mlp.gate_up_proj.BLinear.weight", "shape": [ 2508, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20545536, "byteOffset": 11022848 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31568384 } ], "md5sum": "d68cacb27700b70f639d7a714876e06f" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a5ab77adc523f4b9b5eed929212d4678" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 143818752, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.ALinear.weight", "shape": [ 28672, 2508 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 143818752, "byteOffset": 0 } ], "md5sum": "71c9ec1aed0f50868c5fe461d27bfead" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 20545536, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.BLinear.weight", "shape": [ 2508, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20545536, "byteOffset": 0 } ], "md5sum": "3209471d6bbafcbbf8bacf3528eb255f" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33257984, "records": [ { "name": "model.layers.0.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 764 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6258688, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.BLinear.weight", "shape": [ 764, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5476352, "byteOffset": 6258688 }, { "name": "model.layers.0.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 929 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9988608, "byteOffset": 11735040 }, { "name": "model.layers.0.self_attn.qkv_proj.BLinear.weight", "shape": [ 929, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7610368, "byteOffset": 21723648 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29334016 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29342208 }, { "name": "model.layers.1.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 29350400 } ], "md5sum": "f33728aad54650df21ddad8095e81f89" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 39862768, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 869 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39862768, "byteOffset": 0 } ], "md5sum": "fba3c276187fcc8c295691d612d76385" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 30961488, "records": [ { "name": "model.layers.1.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 3419136 }, { "name": "model.layers.1.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 9666048 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14425600 }, { "name": "model.layers.10.mlp.down_proj.ALinear.weight", "shape": [ 4096, 302 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2473984, "byteOffset": 14433792 }, { "name": "model.layers.10.mlp.down_proj.BLinear.weight", "shape": [ 302, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6926672, "byteOffset": 16907776 }, { "name": "model.layers.10.mlp.gate_up_proj.BLinear.weight", "shape": [ 869, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7118848, "byteOffset": 23834448 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30953296 } ], "md5sum": "fb6efc8ba44e8fa05d7e5aa7215f7c85" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 27742032, "records": [ { "name": "model.layers.10.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 3907584 }, { "name": "model.layers.10.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 7326720 }, { "name": "model.layers.10.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 13573632 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18333184 }, { "name": "model.layers.11.mlp.down_proj.ALinear.weight", "shape": [ 4096, 302 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2473984, "byteOffset": 18341376 }, { "name": "model.layers.11.mlp.down_proj.BLinear.weight", "shape": [ 302, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6926672, "byteOffset": 20815360 } ], "md5sum": "f09409a839ff59bd1d9b4d86f28582b0" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 32396096, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 348 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15963456, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_up_proj.BLinear.weight", "shape": [ 348, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2850816, "byteOffset": 15963456 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18814272 }, { "name": "model.layers.11.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 18822464 }, { "name": "model.layers.11.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 22730048 }, { "name": "model.layers.11.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 26149184 } ], "md5sum": "d3315ab98ad9ac8fe41f5473a8fc95ea" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 27683752, "records": [ { "name": "model.layers.12.mlp.down_proj.BLinear.weight", "shape": [ 1207, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27683752, "byteOffset": 0 } ], "md5sum": "a0ed3e59ecc30e0246e7526f2ca9266d" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 63807952, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1391 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 63807952, "byteOffset": 0 } ], "md5sum": "deb25de01382e531ea3bfe0a54afc093" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 33385472, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4759552 }, { "name": "model.layers.12.mlp.down_proj.ALinear.weight", "shape": [ 4096, 1207 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9887744, "byteOffset": 4767744 }, { "name": "model.layers.12.mlp.gate_up_proj.BLinear.weight", "shape": [ 1391, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11395072, "byteOffset": 14655488 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26050560 }, { "name": "model.layers.12.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 26058752 }, { "name": "model.layers.12.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 29966336 } ], "md5sum": "5432f37bc66d3a22d69d845abec551b4" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 93945856, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 4096, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 93945856, "byteOffset": 0 } ], "md5sum": "d88d41424d140a27df3b8a19eb9add1b" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 187891712, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 22936, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 187891712, "byteOffset": 0 } ], "md5sum": "e4bd6d31019991e240553914f6cb76e4" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 93945856, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 4096, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 93945856, "byteOffset": 0 } ], "md5sum": "c6271de9fac5524c47243758c276d00c" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 187891712, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 22936, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 187891712, "byteOffset": 0 } ], "md5sum": "9a6d3c183644171a213247e30238e9e9" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 31238144, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 6246912 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11006464 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11014656 }, { "name": "model.layers.13.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 191 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1564672, "byteOffset": 11022848 }, { "name": "model.layers.13.self_attn.o_proj.BLinear.weight", "shape": [ 191, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1369088, "byteOffset": 12587520 }, { "name": "model.layers.13.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 13956608 }, { "name": "model.layers.13.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 20203520 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24963072 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24971264 }, { "name": "model.layers.14.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 764 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6258688, "byteOffset": 24979456 } ], "md5sum": "e52b272b0bcbaac396ea7ef413245bb9" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 23083520, "records": [ { "name": "model.layers.14.self_attn.o_proj.BLinear.weight", "shape": [ 764, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5476352, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 929 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9988608, "byteOffset": 5476352 }, { "name": "model.layers.14.self_attn.qkv_proj.BLinear.weight", "shape": [ 929, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7610368, "byteOffset": 15464960 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23075328 } ], "md5sum": "6fb971b09e67d2ed41219f0bfdf8177d" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 38050824, "records": [ { "name": "model.layers.15.mlp.down_proj.BLinear.weight", "shape": [ 1659, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 38050824, "byteOffset": 0 } ], "md5sum": "95969ae95921fd619ca7f9072f69d8d3" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 39862768, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 869 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39862768, "byteOffset": 0 } ], "md5sum": "2fcf9c169a567c74af5b8327d9a18dc5" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 32452608, "records": [ { "name": "model.layers.15.mlp.down_proj.ALinear.weight", "shape": [ 4096, 1659 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13590528, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.BLinear.weight", "shape": [ 869, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7118848, "byteOffset": 13590528 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20709376 }, { "name": "model.layers.15.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 764 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6258688, "byteOffset": 20717568 }, { "name": "model.layers.15.self_attn.o_proj.BLinear.weight", "shape": [ 764, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5476352, "byteOffset": 26976256 } ], "md5sum": "6f7fa98aadd46f215394c800374994ca" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 93945856, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 4096, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 93945856, "byteOffset": 0 } ], "md5sum": "fb7f6ccba0b8f079bffa5382dc31882a" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 87707264, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87707264, "byteOffset": 0 } ], "md5sum": "08f29edc6107efbfb66b7f9380cd6543" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 24218624, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 1278 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13741056, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.BLinear.weight", "shape": [ 1278, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10469376, "byteOffset": 13741056 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24210432 } ], "md5sum": "2dea23a6cb7c71c6b76550bfbb48bf9f" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 32986624, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.BLinear.weight", "shape": [ 1912, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15663104, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 15663104 }, { "name": "model.layers.16.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 15671296 }, { "name": "model.layers.16.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 19578880 }, { "name": "model.layers.16.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 929 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9988608, "byteOffset": 22998016 } ], "md5sum": "fbcdde3ca5d607359d1fe98470fad998" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 93945856, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 4096, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 93945856, "byteOffset": 0 } ], "md5sum": "d69995a449c0752e2784fdfee0c5380a" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 87707264, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87707264, "byteOffset": 0 } ], "md5sum": "10e79acb6908e3982bafe97339bd3422" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 30616576, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.BLinear.weight", "shape": [ 929, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7610368, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7610368 }, { "name": "model.layers.17.mlp.gate_up_proj.BLinear.weight", "shape": [ 1912, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15663104, "byteOffset": 7618560 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23281664 }, { "name": "model.layers.17.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 23289856 }, { "name": "model.layers.17.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 27197440 } ], "md5sum": "f805bc1ce24ece6e15f1b7410b1a8491" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 93945856, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 4096, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 93945856, "byteOffset": 0 } ], "md5sum": "d9be2f3368b8920e1b609d10aa6860fc" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 187891712, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 22936, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 187891712, "byteOffset": 0 } ], "md5sum": "7af58ac079c0951dd29cc64303219f7d" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 24942080, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 929 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9988608, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.BLinear.weight", "shape": [ 929, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7610368, "byteOffset": 9988608 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17598976 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17607168 }, { "name": "model.layers.18.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 17615360 }, { "name": "model.layers.18.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 21522944 } ], "md5sum": "04dae9d0020b62e94edeb021cdd382c8" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 87707264, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87707264, "byteOffset": 0 } ], "md5sum": "b4143dea64b9d9bc87b3858831bafc60" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 31189504, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 929 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9988608, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.BLinear.weight", "shape": [ 929, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7610368, "byteOffset": 9988608 }, { "name": "model.layers.19.mlp.down_proj.ALinear.weight", "shape": [ 4096, 1659 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13590528, "byteOffset": 17598976 } ], "md5sum": "c80ba1ab6db44f4eb05ce88d763bfd4d" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 29236736, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.BLinear.weight", "shape": [ 1912, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15663104, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 15663104 }, { "name": "model.layers.19.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 19570688 }, { "name": "model.layers.19.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 22989824 } ], "md5sum": "fdafe2b254de9bc1e32b5ff57d54d365" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "80032152367622ce623ecfb7b1a3460b" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b0ec05c5e6b5b8b39b86be4a8331180e" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25843712, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 0 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4759552 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4767744 }, { "name": "model.layers.2.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 4775936 }, { "name": "model.layers.2.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 8683520 }, { "name": "model.layers.2.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 1278 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13741056, "byteOffset": 12102656 } ], "md5sum": "a42c26da3fe59ad63b836b3dbfcb7ff4" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "234937419fce2e99d513173012511195" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5387dd28fe2c5cce5f9d92fc6f130073" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 22220800, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.BLinear.weight", "shape": [ 1278, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10469376, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10469376 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10477568 }, { "name": "model.layers.3.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 764 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6258688, "byteOffset": 10485760 }, { "name": "model.layers.3.self_attn.o_proj.BLinear.weight", "shape": [ 764, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5476352, "byteOffset": 16744448 } ], "md5sum": "301cd9454ccf300aa1903de2185e1403" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 17301504, "records": [ { "name": "model.layers.4.mlp.down_proj.ALinear.weight", "shape": [ 4096, 2112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17301504, "byteOffset": 0 } ], "md5sum": "25d3edfe5bc92af2121bd808b8740d41" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 48440832, "records": [ { "name": "model.layers.4.mlp.down_proj.BLinear.weight", "shape": [ 2112, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 48440832, "byteOffset": 0 } ], "md5sum": "055ee0dd186acb48d5fcdd6a8855f680" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 111606576, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 2433 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 111606576, "byteOffset": 0 } ], "md5sum": "0244d8ee721aefc84dd17d1788f467ec" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 19931136, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.BLinear.weight", "shape": [ 2433, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19931136, "byteOffset": 0 } ], "md5sum": "3dc0b1e03b2ed077f081303a6e6ced58" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 31553536, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 1278 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13741056, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.BLinear.weight", "shape": [ 1278, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10469376, "byteOffset": 13741056 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24210432 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24218624 }, { "name": "model.layers.4.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 24226816 }, { "name": "model.layers.4.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 28134400 } ], "md5sum": "c60125672471349ef4b6cd32791d597b" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 38050824, "records": [ { "name": "model.layers.5.mlp.down_proj.BLinear.weight", "shape": [ 1659, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 38050824, "byteOffset": 0 } ], "md5sum": "4ce2d28dcbf6a230c66345bff2c7b616" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 111606576, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 2433 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 111606576, "byteOffset": 0 } ], "md5sum": "c735fd9b1634c8857abf1b634df4ef68" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 19931136, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.BLinear.weight", "shape": [ 2433, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19931136, "byteOffset": 0 } ], "md5sum": "5f5eb8f596a436f6328b5e747bf7e2ab" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 31205888, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 929 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9988608, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.BLinear.weight", "shape": [ 929, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7610368, "byteOffset": 9988608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17598976 }, { "name": "model.layers.5.mlp.down_proj.ALinear.weight", "shape": [ 4096, 1659 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13590528, "byteOffset": 17607168 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31197696 } ], "md5sum": "57ea8f6c72464248e713c5b4a43ca9b2" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 93945856, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 4096, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 93945856, "byteOffset": 0 } ], "md5sum": "d9d4ec6a18f93980af9e434cc22e020c" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 187891712, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 22936, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 187891712, "byteOffset": 0 } ], "md5sum": "6f186492ba1609e1c7da523fe2061117" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 32268800, "records": [ { "name": "model.layers.5.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 3907584 }, { "name": "model.layers.5.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 929 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9988608, "byteOffset": 7326720 }, { "name": "model.layers.5.self_attn.qkv_proj.BLinear.weight", "shape": [ 929, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7610368, "byteOffset": 17315328 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24925696 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24933888 }, { "name": "model.layers.6.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 24942080 }, { "name": "model.layers.6.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 28849664 } ], "md5sum": "41b22d0b90e2b0b945d68c223e987573" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 27683752, "records": [ { "name": "model.layers.7.mlp.down_proj.BLinear.weight", "shape": [ 1207, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27683752, "byteOffset": 0 } ], "md5sum": "32bb35a980276ca678e1d889ead937a3" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 87707264, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87707264, "byteOffset": 0 } ], "md5sum": "b8e7e0ccb4f2fd87b42eefbfddf9f6c4" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 20902400, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 6246912 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11006464 }, { "name": "model.layers.7.mlp.down_proj.ALinear.weight", "shape": [ 4096, 1207 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9887744, "byteOffset": 11014656 } ], "md5sum": "3182d50ffab3cbb790027d625c6855dc" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 29244928, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.BLinear.weight", "shape": [ 1912, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15663104, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 15663104 }, { "name": "model.layers.7.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 15671296 }, { "name": "model.layers.7.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 19578880 }, { "name": "model.layers.7.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 22998016 } ], "md5sum": "e0d6237577970048e42164ea216b43d3" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 63807952, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 1391 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 63807952, "byteOffset": 0 } ], "md5sum": "e12794d67055dd120758283aa5c3ea57" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 28238256, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 0 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4759552 }, { "name": "model.layers.8.mlp.down_proj.ALinear.weight", "shape": [ 4096, 754 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6176768, "byteOffset": 4767744 }, { "name": "model.layers.8.mlp.down_proj.BLinear.weight", "shape": [ 754, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17293744, "byteOffset": 10944512 } ], "md5sum": "f97d8def2f32ee98db97f8a4ce3d899a" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 32218624, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.BLinear.weight", "shape": [ 1391, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11395072, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11395072 }, { "name": "model.layers.8.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 11403264 }, { "name": "model.layers.8.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 15310848 }, { "name": "model.layers.8.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 18729984 }, { "name": "model.layers.8.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 24976896 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29736448 }, { "name": "model.layers.9.mlp.down_proj.ALinear.weight", "shape": [ 4096, 302 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2473984, "byteOffset": 29744640 } ], "md5sum": "706209c3e57042750a3db20d8de6a8ec" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 39862768, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.ALinear.weight", "shape": [ 22936, 869 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39862768, "byteOffset": 0 } ], "md5sum": "293a887c1f02954435b86eef13e268d4" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 32386896, "records": [ { "name": "model.layers.9.mlp.down_proj.BLinear.weight", "shape": [ 302, 11468 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6926672, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.BLinear.weight", "shape": [ 869, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7118848, "byteOffset": 6926672 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14045520 }, { "name": "model.layers.9.self_attn.o_proj.ALinear.weight", "shape": [ 4096, 477 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3907584, "byteOffset": 14053712 }, { "name": "model.layers.9.self_attn.o_proj.BLinear.weight", "shape": [ 477, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3419136, "byteOffset": 17961296 }, { "name": "model.layers.9.self_attn.qkv_proj.ALinear.weight", "shape": [ 5376, 581 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6246912, "byteOffset": 21380432 }, { "name": "model.layers.9.self_attn.qkv_proj.BLinear.weight", "shape": [ 581, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4759552, "byteOffset": 27627344 } ], "md5sum": "48a8258dacdd7c636376665538dc8927" } ] }