{ "metadata": { "ParamSize": 405, "ParamBytes": 7322112000.0, "BitsPerParam": 4.500366415925148 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 81960960, "records": [ { "name": "lm_head.q_weight", "shape": [ 32016, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81960960, "byteOffset": 0 } ], "md5sum": "5672d39d8bba483f01440030c1c28c80" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "22b919625bf1390bf00fb4f2f25f77e7" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "78026d010ffe9aefa384ba29d54c9653" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0f72350686ce8fc916d1539a4dfaaa17" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "967cf328bf845044138be22dcb65788a" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 32896000, "records": [ { "name": "lm_head.q_scale", "shape": [ 32016, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10245120, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 10245120 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 10255360 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14679040 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14689280 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14699520 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19123200 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 27970560 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 27980800 } ], "md5sum": "5609d0b6099d4d5c275397f4334189c5" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b43a99e363ec1d7fed4e9797c5d14e13" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "36430333cfa9a44e3d19e92d692f3e8b" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ae283724758af345972e9cbbc4f77f57" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "b10d7075ad3c656d180693562e7754f5" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "871762254f17f2ec3b4a865a76e118a9" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a77308fd1ec642c9c6740ff924a39fc2" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "a53e3d77a885ab5c6c6f64fa50fbc81b" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "77b2bc2e8f445c4ee8eef9689253d181" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "1d159b2bb29bcf0fddacfe56923f0ca7" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6f1410edbbb8ccfe64105a39e761eb79" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "162653b923eaecf6b8571b4ed558e4e1" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "7564c1e21c17fc0353db6cfc1783311e" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "69324acff215a0d2fe49a9c58d4d3389" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f1d16319cd42ffda08f939265aaa4944" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "fd57bfba972b350a1a345fdf2b0c6ef5" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "7ff46a7a81310f9a629491667b6e5991" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "cf852e83a3356041c4a9b2d12c41bdbd" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "567456d93db3aad67a00f15c0ed7cd90" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "8efd118cd27f43937b4477f8e0134879" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "4abd156b0480318da745c871c0267644" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "82645d9c67d8bcb78a1dacffe5cbc788" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3fb8db984165ccf0e497b83cd9ed6b04" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ca09c11c9d1354433a6f0f29c4dad1a7" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "909d57d844565fab2b2862f6b2f07214" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "3ac627bda8c1fabf8235751cee59e0a8" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6441a1c2674a86de8eb7e547660a20f1" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "dcfab6b76fcb4fc9e3c5c7899aa4d99f" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "e8986588c0af5a050d2cad8f8bc83a25" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0a01a46b99d9254a91bfdd8c9d2b38c2" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9760903f14f417abe40fd33cec96df3c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "05f3aeb6b2e1205a6e9810d37dadb9ce" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "3f2a9834c934000f7ad4a8c11498cd31" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 81960960, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32016, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81960960, "byteOffset": 0 } ], "md5sum": "ef1e0338b9ae1ddfb0fa5f12ce540cb6" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d66a3fe9fe3ea5dfd0c343d922355421" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fe3d4996cbbf494b4a89e7172edc5022" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29434880, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32016, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10245120, "byteOffset": 14755840 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25000960 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 25011200 } ], "md5sum": "c6a65a41cb3a4ebfdcff7c2be7a749bd" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "a23ff40fde75c469e9cb87cd9a773c63" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "54d2a750f9b9ac84eecc49868912ae4b" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "39a1157ade9cbb5525e263b5843b4cb0" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "d06616b44b954752f67c3d7f52fb2cd0" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b8afb3f3ca896babfff71d12a70367f9" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "1ce439317dccd5305cddae4b7fc1bf88" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6152441835e020628f8d9cf1b30491f8" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "3255b0902ae2bad71b45251670e552f4" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "be2b934e1ea870739c9fe52dbd5badcc" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2d69d6a8ae85c97acd4a2a4a64368b1f" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4be42e1247400de5f0f56aa551753674" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "30946abaf7ba8808f55016181ad53a86" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "64164a4e6b53d9c0a07659aa3c1ff881" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "5a16f740e3c9bb902c0e3b0b4937e4d7" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "55da67fc1cb8bf5e1900aa65aabaaef8" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "37f3a111c1b5911d02114ad33e18030a" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "8ca4f2ba079b6d9b828c9b602134fee1" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "808a395a6bc79cc6b08fc2f464e50092" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "179c220b1535560da6c690d6c2bbd4c0" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "de69ebc634fa5c3f9410d942b0e4e49e" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "3722cb920c8dede73ac6cf17aa09d640" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2c6c50aa9160f25d3a6a38e0459e32c7" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "98cf2dfe46dc4bbc75a022e230e2197d" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "1b5bd9fa4400170d41a5e097e2a61a4f" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "7c7631c7f49397309358dc1b57203977" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "08a00dc061883294956c566edfd8ce1f" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "afcc7ca52c1639c911a1490435292d81" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 33443840, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28518400 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 33433600 } ], "md5sum": "2778cbd4efbdd35197d7ae5119b2d138" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "856d783284a9f24c5f8b7b7df314c34a" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "cb9a3cfd748e8492059959902563ba3b" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "74988421d3dbf22ff69499f826822b93" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "06c69f367222b40a62289b2ec11ffa79" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ea34db8b7d5703785231b49f72271a29" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "481bf9f30616c4c15b105b1ffe93cb0a" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "1cdc8a56b49c7c2144558a4fa8f47e96" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "2471429dee94ffa6beebe4593805657b" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c4ecfd74cedb402eea700af6786f8baf" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "d13551d17a2336fcd4a3f052a90f2121" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "fa1d4147318150ff9d9b40576c0a20a5" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "393b536c20555e45056c927ea11ba926" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ed7389e6a17d166aacc008e3ebd58b62" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "00db5a4503406f231e3fc3198f0e506c" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8d1c4b844f8296d0fd995aebb78d3ab8" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "ab736d2efb25a23911d0e17b49430f53" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "751f3216e67fc929b95d207b13857467" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "174dc89952f805c506a648a38b9b71b8" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9e0859ab08b30662889b16ff5fabc5bb" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "a4fc46789f6c959c0a348749061709b1" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f4b827baf2344a44cc61394c76601ecd" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "03fb011c2db57f9d4ee82056e7cbc65f" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "122b98fa9b1b416b4d87f7624f96aa2b" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "10a762aeaa6ebf8570871ca1eabbef87" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8613e3686aae9cd54287e78c247ce5ba" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "fee5e26e3857708f82f20f9a34e13ca8" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "03794d1ce7581fbcadb74fbd3dd2c81e" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "2e32b5098b1dfea08c69b3005d565cbd" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ccbb0b8a47751ca37a3ac32f17890cf8" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "7684281a00fcb973de6f8c7f09b2c2be" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "953e8c5ba8408d6e61dce80d83308e4d" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "9da117a6990b1af9dac345970ecb5442" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2032ae0aeccbb0fc0b7d1c32f7a9c47a" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "ffec448030e7c996aec76252d134a87f" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0419d2f9f0d79e416f5ee3700b736fd7" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 32460800, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13281280 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26388480 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28037120 } ], "md5sum": "2035288d157aaed1536613c453140886" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b158d2636a27c5251aabc5230e91f88a" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "fec6760387fd267abc40088c0674c556" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2e130c57fd315d88c62ee999d8d8b5c0" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "9d4933ab963023e32c382acb5d887125" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "0bf2f2f9b4e598e8c0f651e9e899528e" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "e4f3b2918a792e9919f6cb6392452f1b" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8e40a6261071eb914bd2f095b0081129" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "aaf43a8d5332e065de4268fbeef6af53" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "68e0c7ddbb8b55a03927171d855d295e" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "6f65ccc9e5a9937e27eeb91181ff4f37" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6d400ce867987fbabb0eb82e30a3bc1b" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "d147f0b4db0e4a45d0f37f9bd039f2fc" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "91c9042bfb8fbfac04a9d81df7f2b5be" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "84a0e3c871b2f79c76b5f3c2d19cd981" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a52650e183d230db42bef9e754910098" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "dfecf77f2d467843369991fc13895101" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b8a2048b724437c7848cd903e443fccc" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "ffb51cde3d4f2107a764ec7f484f790d" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f1ddc1e8dca4bf21218ee3983719e5c1" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "9edca3747c2457bb4f36f472f2006aa2" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "853e61a3bc745ee8bd1196d1189b7ffd" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "638d319849e820d845a2856276fa52a1" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cd172c8fe9d93d26c77fc248452e5402" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "07b4025820cd44c0f69b38f5d6846c51" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "48df12d6836a2e9e0ccb71371f88791f" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "fbe1c5c26ee1c68498ee33f2f8e9f42b" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4cd5ba7f78d5544aeb683735a575c780" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "508d208e8f19b5818840b339b3d265fd" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "53c866a6f6e45455fc590636c1d17331" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "dcc8b35226abea45c6b38b692afac9ac" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "08d5fd4d26defaad8604e431987e1e67" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "1cf64c3ff1e771346f59503e74d62432" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "a788df8560f9e03408835ca8bd36658a" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0400384b874822938ea8dc1414c3653c" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6686b95069367959515d6a4979f7e960" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "6f6cd83945a4a2f779f4b54e92b54c6b" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b3f5c4146a12d67bb50d68aab3c3521b" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "f552d990e5e5266a81b9dfa6d0d15608" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d3c7abe89da0dcb11f58dca13af68991" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "017a73c04e5613e750a3efae41e44cf3" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "2df2807c2907a0b5088bad28b1e678a3" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d302ca3ba04450ee6bfb4e19b9a65fa4" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c17994017228fe68007c0bcf772287ce" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "d778e6f985b6aad4b3a736bfa8c558a4" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "70ed1686b924ac31610793366a1c3269" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0bf0f8d73fa7938b4773004e35a704be" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f23ef7532229e48e445afa699c08d319" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "42ff389de110b412fecc25d7f3349a5d" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "c141e7204b6b7ac0e1b158aecc5e735c" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b2e3e09ff3acb8836f558d062bed16c4" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "709ffecb206028669db5247aa9217412" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "db78a2e710bb871e6b94b645b32050db" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "210357deef1ab2ef3dab1bba3ee1d3a5" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "51ff996a7dd73cd3a0a795cea7a7fa75" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 28518400, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 } ], "md5sum": "d71720238115cea38f8ca38c946681e9" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "7283da7dddc35c4856365f38a713a80d" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 28508160, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8847360 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13762560 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26869760 } ], "md5sum": "11b3d75f7e14677e37d860d798cb6670" } ] }