{ "metadata": { "ParamSize": 146, "ParamBytes": 3422752768.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 49152, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "a259e04dd5ac68366d0270853fc6baa0" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "142b00822051f07c35c0297306732f48" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4cdf051c6d954220db78f14789eb9243" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4096 }, { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 8192 } ], "md5sum": "8d623f48a653accb77cfae436fd0df5a" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "15a17acdb21ca5266f0b35542c25fc0c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "361e19c8e4a80770ff6ccd63c47704e9" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e418d11051dacbf65518c0f728c11f77" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a51763f48f8e20769858c3bce34eeb7e" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f14cb56efe79d85c98c9f8c6df8f4e3e" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "049eba8a73a15f6d39a296e687d24d1d" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e187eb9be4ae5793e175f7920abc0f4c" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "403c4b426ffd4f9eddf429e7298d4f36" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "02e6eda657d12ff2f02d34a36b4910ba" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25190400, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16785408 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16789504 }, { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16793600 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25182208 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25186304 } ], "md5sum": "1ec7a96eeb0c2c8b32aa68c370851e5d" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6694a4f83cdaa11619ef14ea5ddb3e94" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d55465755e112eddf6194b5cbe941f30" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7ee58e03244ec98675592a619a4712f7" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "da0a9f97d7bdf6a0ddddc378bdcb0c42" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3abff689ea70caf218a0cc0fcf672c1e" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d55edaa50a35290566eedc5fc1f401e4" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2edf76daafe562f5c10616058c58a47b" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "8fec62b169f638c3d9ae042f194f0fd0" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "00e69a33d1c557ede0d69183450a1707" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25190400, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16785408 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16789504 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16793600 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25182208 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25186304 } ], "md5sum": "d27d3c72e5678c5621bf5770804a0b5c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7050ab00c6e2924acca7d91fdd19ab5b" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "55429b972e8de6f6cb68ea14012bd15f" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "251e12116f3f4c8480a710cdf57264a3" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f9a6c1dffd0b9c3b98fe4d1848460207" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "04e9237f609321497557047d90ab9316" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9f7ea2e608b0696573f4ca3f5898cb5f" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bab64fd3d4db995a0564c7eb7509dcf4" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d4b45409b45a338c10c2e1d9e4c13907" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9c6714e8cad4a4764f6904a65046bca8" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25190400, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16785408 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16789504 }, { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16793600 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25182208 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25186304 } ], "md5sum": "627446641a00cb8a056fa4f107f5ee33" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3fee3063f6956bfef4c59018d149eafc" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "009bf55dfe453da73fbe39cd694b27fa" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fc29b456c44817fc0c8c15384e56fc43" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "04937f32d4e969f6787ef0bf56aac323" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f35cbbdeaf8f04e22bd45d0e1821ba4a" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6bc15385fe9911a15daa5dcab12c5318" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "985436f845616fa88353fbbab318599a" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e04d5ed0f18bda06740e1750730a8563" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8654458158b0b0b7dcc3b9a501bd516b" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25190400, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16785408 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16789504 }, { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16793600 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25182208 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25186304 } ], "md5sum": "cc8b44dfb4291d38f618e690a03b0dd4" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5da44e3580810651e3ca86453a66289e" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0679620c0b95a61079dfe779a9b32b03" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cfb852b4681a0714865aa9927d0d4cbb" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bbf70e6104bfae17a95995790f4b58de" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "df7431c02ed05db25c4dc0772dc0cc7d" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3c8f1c16e84e0c212f02926265b69338" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "23bac0243265691f27163e18b6b8b9de" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f45ea17e274b1457fe96d16b9fb7a843" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0a8fc03151241e115d9f1ea41d4a5cdc" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25190400, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16785408 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16789504 }, { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16793600 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25182208 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25186304 } ], "md5sum": "514579ed33a0c7548c4745e8b15efca2" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "13d802b61d0ef424539a18b470d12d8d" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4513e8c8ad8878fe714635aa20d21644" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "44a34baaf11c41475ea721ff2e3826d0" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "85777dca1c0a19051e360d42d8f4c2c1" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "7168a60708c4305d3749dee0db5b0874" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e8ffa4a7cd803a596c562114d97c1113" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cda4047891966d559763d59795e089b4" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "15c29db6b563475d715a448cf9abb852" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b27665bb70d6dbc97ee3946b5f67a974" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25190400, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16785408 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16789504 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16793600 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25182208 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25186304 } ], "md5sum": "8258de8c45a1904dd5c144ddacf94932" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "603d5a6e825c0e1dd7238bcf1d6d5a9a" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d0677587e19d32f120bbb24b7e1c3346" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "08d590fd7aaf12ceebb89b32a3901da9" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b56db862d598ec7f29eed7cd9918cf6f" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d7680d7619191bd632fa3b80c6c2000c" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "310d44e7f1fa0b05a1861f7bf54ad1ba" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4414a7bc6d47cdf2a762f1bf6b45704d" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "fc407cd06f2c4aee544ad2f3cad072d8" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f039e34665f461602cc853154a8c20d4" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 25190400, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16785408 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16789504 }, { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16793600 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25182208 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25186304 } ], "md5sum": "7e64f6ee1bdaac20d6e8992df1310660" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "32d627f9928b05bd30fd6bd859ef5b68" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "1b59151fcaf44c9995af9880d8a15d96" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "101037c77c03764fcaf1a4f1bbd2a13c" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "577f019a0505d834c070922505c011b5" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2f880bdd28b6419920d338d464bee211" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6f1878dece9d888a3ebe83fa83fba105" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25186304, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16785408 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16789504 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16793600 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25182208 } ], "md5sum": "978338a8729a5504c555f701ef532be4" } ] }