{ "metadata": { "ParamSize": 405, "ParamBytes": 8136417280.0, "BitsPerParam": 5.000860280867739 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 81960960, "records": [ { "name": "lm_head.q_weight", "shape": [ 32016, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81960960, "byteOffset": 0 } ], "md5sum": "40bb94b868d5c5d1178782370217fc2a" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "3a50633aa29e3574e346057a8d247fff" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a5057587038042a69b99533dc190867c" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "701fd58bfb1bbbfdcc0713b2ca1f016e" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "f4f58d6a2bab6a86b20012fe6c10e834" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 32896000, "records": [ { "name": "lm_head.q_scale", "shape": [ 32016, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10245120, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 10245120 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 10255360 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14679040 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14689280 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14699520 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19123200 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 27970560 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 27980800 } ], "md5sum": "fb8fed55b32956fb0961e765a51f7d66" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "c097987bc73eb1b20d061b9ab925ff89" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9ff5ba83e446bd25ad5a64675a3c388f" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ef57ee584e72f211fe7fc179ff7d3e93" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "6dcaf4617cb613eb139f19b26be99b6b" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9ae0129da1d975ede149da8816c37063" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9236d1f1cda6adcc8a239a4ad1446218" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "aa1cb50a672a2e2cdcc9fa5c873a960b" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "fd3044cb5590f13dc720260c7f3ad145" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "49ff5b75617ba4290f6d5522bd9e9843" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4676ba38530e292c55813879c5735bbc" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "70fae7bbd7c8eac6ddb032a9b0a73e8e" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "2946de0639239bd7e01c9a18d51decaa" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "43b21bf0cca926a4f636f3b60fa07b77" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3673cf37d1a47e6827e31c9ef1549ba7" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "2974696a3a852cd5679806f98b99610e" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "48785d75eb706e6e727d1a5a5fb6c079" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "140bf34c77f04cb3fd241d255cd39df0" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d05f968a20b6c5c69d7c9ed34f070424" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "31827ffec05585d2c50365ccb3cb6e07" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "24351968087a9f2e925ffcd7c276fa24" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "c46674ce8941f201965f491b71f75221" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5086f85ac33d8f9ea04da92cbb71c378" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ff18512cb689421803efb74fcdb45c88" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "9062355d19a2068fe3ec61dfa2a46fb8" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "c3b9f5f93954f7c06d54cce989520327" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4f93d31b35e4c667cbd189b0e5e0809f" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "10c1f4c4f3a90d99a0b7f7340afb42f5" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "f4ad1254f9fb6829d6fc3702678b62e2" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9031c79b513fb4cb538b9f77c00f97df" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7ad3a2cf23f2c1e6eac83acb507afcb5" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "0316f56fba1199017d9f77392cfc41e1" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "6bd26e10057a4cdf04717f81f892ab63" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 81960960, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32016, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81960960, "byteOffset": 0 } ], "md5sum": "6d50578ed15d045b9e9337ce19dc9e13" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "88ad046b06caf1f099410da09e8c08ae" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "aad31fe9daebc1a4333520dfeeac5e16" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29434880, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32016, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10245120, "byteOffset": 14755840 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25000960 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 25011200 } ], "md5sum": "a42c4c5722cceb56ca3843f23030d8d7" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "a9ad960dbf9898a0110e5209d7f851a6" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d38e117d1bc54770b58a357150612cb0" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "627bdaf4e05bc259572cffb97c566bb3" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "c11b49c5523eec5f0362136acf70b693" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "464c47fed5e408d220b180840688e047" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a07015e2d274f11f7580ba57d61f7f58" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2157878fbf0371f9aa46fa719f3c2e44" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "f62cf06699e721f8cb947297e6d75038" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "4c166d3cd944ce212d82411cab33a3db" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "087cffa8993baed646f70835ed01d801" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "45240ef0c94bc57f26718d44aa71126b" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "ee90bd70c1631e2d29868f03953722fb" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b84556f79a653cbdd600a90e5423fb10" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "e30f53f61db49c0d1a440c0d41da7993" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c53c8c09cc766a9d7cde5a247cc08e3c" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "79dd0ca11094669a17b98d96c46fadce" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "601cd4f39c7d319e721cda23723e2620" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "03fbff68c95e7196144807e2966e508f" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "752f688683e96ed41649f9c51ec72b61" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "d019c1053289fd9469c5280e22b42f6a" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "bac5ae6155936c667cbb8de62dbcdcc7" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "db610679cfc9eb8e97338aa7b2eab73a" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b5fb2ae853db070480d09a76c3b4940d" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "14d0821fb287e7c0c7062d3bbc2dd9df" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "334c5696712049dd939664f60d7be996" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "6887659e58a51fd92d2f2d6710720596" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a18424ab79802d42849ead11f38b3f6a" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 33443840, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28518400 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 33433600 } ], "md5sum": "74d93b21554057fd3e7bd1b7992c4b28" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "33a18d549487ab491d51cd2bda311d90" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "e0d330ff6cb42009b27750d18ffd3c1a" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "082f67912ffabfc40a537058a7962dd7" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "0619bc62d8cef0393596e8bd0d744683" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "57564e6b80d99e3374019ffc1eab9fc9" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "0f9e434aac95c0dc4ed772e0d2ce63e1" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "5399a853abd238072e13f01176f1d0ea" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "0a43ad732c4d5af7f550a5635bc3ede4" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "38e9385d0a27ef2f521c16234cd452bc" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "d3a704c16efe9e3778b3d877532789ac" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0af6740c6874dcd6110207a0383ab589" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "5263d086b262c7359f769d5b3466a8f1" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "690407769abc563de7f4f6cac28b99a4" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "eafa67f8b00506b3c7191c4bf1d0dbea" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8cda6e0f8b962d7265279096b513f676" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "be53125e71a33c77785c055b1b33c4c6" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4b5bc8bf706bbdf39078da737a71f65f" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "da5915ea6266b55dce05190532e75d7c" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "48a330901b9566fddfe8aca05e00b84f" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "6651c1f273e728fcc6e21d6b9f0f9f9d" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2b0501edbb8b7bcc34e48e50be013697" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "c41dd59216d48bf28324717d4ed18e4d" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "f4e5c660a6ea09bdd3ad7e1c3c0fd7cf" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "a97789b9d1eb9910b26c66da96e52345" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "336864eadc4727d89d69293f54a1bd43" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "7103c835067d805630c9592e0e6afed4" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "36f30b74245e69f231a1c778e5bc82ec" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "d280ee37f355eb99aa1060f8e80c0364" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ba5732c2696fedb6180e6a50c1c7a719" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "666524f24f291e44615316e3ca0077c9" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "c02092a6bd46e3e6584a1d7e2768ba1a" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "284149228851a73c9d25af5e5c5fafa8" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5a67e91cd564dfed3c3f739f429d36d7" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "be253bf57ad1829429f22f82d9f7b90b" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c6ab7db40620abed1b0196f1af9101a2" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 32460800, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13281280 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26388480 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28037120 } ], "md5sum": "ca16bc464857ff3e3639780f7b94a437" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "46afeaa52c94c849dacc18a1c32b0234" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "30455559f125674223798144795a3f05" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c89b0d442cabef6d970c6a34da4d3bd4" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "0a2edb147e51488636581ab848428dad" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "47a2489dbb149ca4c2d3e37abe87d858" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8cd44f849a47b67011a8db80c792baa0" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "20ff3539354f912a08b62c7d09087737" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "856fb77ffc906ee591962d6b3c8b2d26" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "e7c0d37667791047a0e1162d5d7abb98" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "1014209eaf8dae72d64db90cd230c639" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "20325c1d981fd3fe0b3231761bb2553e" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "f323226e3442e5bc6d457c935d3a79fe" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "424977f5a8781dd4e5e5b5efbebf1193" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "033e3ae12a200c5b7f5a452b1a3c16bd" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1cd454cb4b01ff79d86e137686ca888b" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "b4d7f3cee6a3e5406377fbc1a96b2209" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "c060478f84feb857bdab9bda1806081d" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "86fc2834eb763f0a6fdb854f251b59b7" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "66824c6c90e0fea23f2607613bd1383d" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "d8618d51db4150b407f371462c3feb84" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "5a076b796d0903c85f92898d56bcbd7d" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "04d08e2bfcb13ea2107d6ea76276b71c" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bcf1b18ccfdf5c6e66c42d84e2ccc40a" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "1fcd6509ba64190fadba3681ed4d30a9" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "62fd91aaba876c4b17f5e6069fa017bd" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "af49842244e3d2be80b9f5a7f3dc043b" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "18eba13dc533bedbc858cb55bb61d9f7" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "64e375896702b3e06a70417b7fddd5fc" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "5d0e4f0e90ea81f4d1a9040918d998c3" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "ab4ea63e346ee3b78d509e6168778716" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ca75a163e624d0df0a34185e073f632d" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "0b9f531a42e72624eacc7c54c135773b" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "95f71cf7c8f3ac38eb909e1542a4df35" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a19f5d434b13a803d64e702cdc4554e3" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3f5c558d3910462c8b4829faf8e46a08" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "4566fbd33eceb6f2d9400c8dcf3908ba" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "147dd03595f6aae7d3a8209ddb968870" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0d99f153e89b23d4c08efbf4e2dac5f3" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "54d4144a95111e1be5ed71106a8d739e" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "9590cb6e326bc1cd55ef8619d56dc004" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "f35bcfd078601351aae3118424aaa010" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2fd5274bfec2e65b20dc8cde9019ac9f" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fbe4530b60c0c3bd42436840933754a9" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "cba14a78ec59e2081be29d14dc7eca1f" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "c5d4a4123a65c63f988fb95981b04da4" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0946a846b05f95cb14d8266f1119778d" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "764c91a15eb8277216e8fe76364138a4" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "f55f2291debdca2bac369c64133b4880" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "a73151c55f66df7e8c87b9343cae6db5" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "81193c1d76e7638ff5a9929702a86b02" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1f0c20a79f0d6c53c502aff20a24c7a7" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "895149fcba73736876c377d38c197257" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "f5a26522addda67c92282e9fc7ace522" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "05e9d3e6c73fbe2b9137188b867bac92" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 28518400, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 } ], "md5sum": "8961854d44d8c0790d463c9827b9c85e" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "e5fbc06bc743100012e6e3664964deba" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 28508160, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8847360 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13762560 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26869760 } ], "md5sum": "4bdf82c8ad60b34a6f1e99d38ed78cc9" } ] }