DeepSeek-R1-Distill-Qwen-14B-PLLM / ndarray-cache.json
Jeethu's picture
Add weights
e318e73
{
"metadata": {
"ParamSize": 533,
"ParamBytes": 7617046528.0,
"BitsPerParam": 4.125675919921857
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "22c65ae1f721841ddab1ac1e186efd29"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 31102976,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
40
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12165120,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 12165120
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 12179456
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 30529536
}
],
"md5sum": "0618355dbd331456ea7fa6977b66b12e"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e2f3561860d3faae58777de0861e969b"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "e31891e8c978a504f371240ac378dc63"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "ce655afd357923fb575d40bdbcd620fb"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f7c46816834e27172be048f43b233080"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "be3320a02e0e930449cade88d397da88"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "72a66ae80f8ea688867f56abde43c39a"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "804a77b9f352f5d83f2cc543f1066f82"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ac5ba9b31891bd205634dcecc4a58bc7"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "4beaac19021c979ef4f1efa1e565f967"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "fb392453bc07ff120ba7c1cd83567e73"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "a9f0ecae43f7dcee9b6fcfc719ebf7f4"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "6fbf341d783070c52b346bc205ac8e58"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "775dfa575e23f441ea3c2bb5d6cda011"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2a4db422a9c3f11db1909e619eab71e0"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "bf7b26c45074c04ad4842ea2917ec763"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "d0fa9bb08ea10bc0f06074646dca53fc"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "29eac0b7275687bc3840a03dd86c38be"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5ba1c3c150c022aeec9ca349cbf32587"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a7ca0e6cf66592d44f5f6742ef9a55ac"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "722b2f9618e74588875154f0a88dc2dd"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "88988f95a98c25c03d2b651b2ed0c397"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "669a110429f6397ef18b6f64102f4c26"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "3280f155c36dfb5fb4d86e07c0b72055"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "81204b6cc1e2506998e7f4fb09acfcec"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "aa77dab0e5fc6d507c26ab3ba105ebcc"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "6316a8d84c1c1d6a9994cef28942248f"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "797b666c83b4b24dff15e817fa6d2f87"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "af847832bc96b62a31ed5849330adc85"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f7de76f1a8a15d72beb6ddac27b3c525"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "77b0a17499d593431d69f5bbea08b851"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "bd152476c169c5146db2edcee4a9fe72"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "005087dbbd4b6c7b1f70c71e4bb9a584"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "75d208ba64e51be47209a697aa8102e2"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "31a772347a9f093262d43c1e8ab81c3c"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "1ad48d6d1551d13e02ed5327a3bfba60"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "762e0193f9f1f42834d0a31b4dfe6fe9"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "e8a2c3f9fc87a7f0b2ec4177ebdee1b2"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "edf5ae93a1f647fbefc516d6c33a23a4"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "29291e5fb612e41f1ecc34ba07b5132c"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "24a941da72b9c7ca278eb464ea9900a4"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b76b4e6ebd15e0a6d031f987bf2d9aef"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "30a83d002b8ddb5129b85a5f03eba750"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "526e2cae873e0a741d2e9aad7f3a277d"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5a442a25726b4a2e6c428fda980acc01"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d1435a718172e7e4e3d14a4003dc911c"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "d2e2da836be13608cca9bb2f4f784547"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "035065d7d49b593fd9091556acf71c91"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "90ab69b3e7cc660bb631903f8137442e"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f412ee05a1edf45516d95437e132b1ea"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "51b5c6d8b263a611fb9f9023968d8f3c"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2ba6e4676d555f0dfe9a08a02e466224"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "1aa079acab766bd3270806bcc9f2faa4"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "e12856713a8a281575e60b8111d3e763"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7fddd3b0cba9be0e94aaf8a976dafb49"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "ee6d6a5e32dd1233ea261c206b22e3d3"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "16c4ddee1f21b4e952fd56097fa5485c"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "74dc24ebede98c0704f7e112ea98c9b9"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "64df2ef3ac740e00a9a525c7edc7432a"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d98d4b2673e97944317219097295432b"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "99b42957e268c44c091ec4d4702f7ce7"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "661840615b77ea83684506a542e87dfa"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a5ffde18aca1c41faa549809d441af51"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "dec5ef03fe0e6bd7805dbf110720be28"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "19bc1b595cab42542936f04d67c305f3"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0ef9bedfdd795a93020093294c5e8775"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "8cea8d747f6b490f04788a04ebdcb1bb"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "ac16788dc9ab357f233d3223d4f5ef36"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "676a7109f9b848696498ff1828429400"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "69ca3c88c9a096322365a7344d8de73a"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "a8ec28b3131d5e238089942c30a72c36"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f49612c63c02939c54d8a9c313eba1f3"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0ef48c2dfd2751a3f32e82451a725742"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "8e92ab5796addfd3210a13c982d8a7c9"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "85f586f8b60fb2c1ff845452bbd13dbd"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "44b7c1805169a72febe8d4360303d421"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "bf8090d2f8ced2d3a9c046b81083bf16"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "4efc187fb3b6614d5a3eefa5237fa9b2"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "31735a3f3ea475d13f954f437a67fcc6"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d34ab1c9072b27142318df7dfa99b642"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "b7e419d2d907f4c1ceb7dd0f5eec6be5"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0074b7d3922027a4bc5a931c61d61694"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "5da33489dbc8e7853548fa83831dffc0"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "2af421186335f53bf3a9677c3dd594f5"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5d34f8458f8bf0946cff60e434bf4aee"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "76cfec405935927b36ad7808daaf5741"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "b3512f499e3af6a7b8c4f8c8bc1421bc"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "6acd5f07bb519c4aee627848f98f8f91"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6096090fc78b3638401c035fb9a0080b"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a32977808ae94043c6132a20c3de1c94"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "ed035a03ac5d59a263708cd09902eb20"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5249000330c047b02fccb8c97e25f414"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f347707d83e98fa3bee0b55c6a37845a"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "5db9ec31122924dc9fadd8ffcd2e5c5a"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "40273d3b4323687a00f272fdc81369a5"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "83a8977dd12ace5e0de1a18302be8f2f"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.28.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "5056828114ce8bca556710707f7f4103"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.29.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.29.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "5d71decdcac3825a66156454e715dde4"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "436ff85abb8739e0c65d4d8ae89ea227"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8b3a3db77e9852f4e6bfb25d8a427e60"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.30.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.30.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "80ef613d369bdf6ee52205889a7610d3"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "01d71fc6ab9d75250a86f9e524fabee4"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "1a61feb70caece63875c759a457185df"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.31.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "1585454857e1c424da84a2d4965ff6f1"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "352c588f25b2390618c9e8141c5e7706"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2a9cf8a5a27cdc9231479c14b87dabdf"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.31.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "b84e191a83cbeae2fbfdd19934a3fd2d"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.32.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.32.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "4b54bfd015f3f53be89058e15d2aa9ea"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6d4a9064ed2e8af20349aca41df4f825"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0722558763332dcb6cbab9cd8928ac21"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.33.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.33.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "0dd4d5754c52cf1ab505d0e8ebb7a383"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4097d55c3c978bf3c8e980480d29e0ff"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7db80b1009763a53c7af17aa59ddbab2"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "28258931244abf68e2af19b1200131c1"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "979087974b1a80216cc437ac78a5df12"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "66bce1c66d10b501513f716dedd812a5"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.34.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "0d92cf53f001d816b7f6fcb8890de46f"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.35.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.35.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "d97afc04683748afc0f930920d07f1f5"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4c904a1bc53e291bd433417c7c480f22"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "eb30bf739e0e09f5821aec1bf094d0b3"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.36.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.36.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.36.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "8fb0b9895b87096385b149f4edbdbd5f"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "05bb6d58912f4957649bc23ae4fac4b4"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "db843488690392c0f0d449738e1d2549"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.37.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "17eaef0678fbaa40d0bd538aab2baa90"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "d9df62b807d270eb747446976045a27b"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "47578a636aea5d571aba3f89802b37dd"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.37.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.37.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "627c362f243cf6f2db247656d5dd7511"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.38.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.38.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.38.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "1da7d1d9d19abef9d27673ce04a982a9"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "50a04f21d36f01be1905ee9f435773f6"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b0d6fe6d7851080e8e7fdb872b2f3059"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.39.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.39.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.39.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "7155a886b5a267e524416e176087afea"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "bd9d5461fd66748c2a327c0c6c4bd811"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "89a28d58168abd8feaf190b9d4f8815c"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.40.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "729d7acab3cf6e5a32b1ed1fb19272f4"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "d821f907ce965e4a8f6221dd479d70bd"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a9dad39a681c33b57a2dc5d5c65c5ce7"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.40.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.40.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "fc94481fa690ed55f76ad1fb5a012b30"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.41.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.41.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.41.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "07cce73b0c9f8eb4e7fc08c1d9234ad8"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "9f27f63d6f2ae6803000071688db0f19"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "434dff62c58c1f08c9bd6b7667afb3b7"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.42.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.42.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.42.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "7c405a6ebf06cebd33b4b389fcf3e0a4"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "caeb2b8cf5b858f8a56ebefc1c64c25b"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "bc12769046c34b71691be63781b6e252"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.43.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "1135f4405c4c7816da045d7f6690b337"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ec6df9006555ec888ed9233af4fae8a7"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b0af6b8ee131522d732b957793b11462"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.43.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.43.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "a4260ca640d4d7e225ad072b2a5e8b79"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.44.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.44.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.44.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "d8c0f722e782a935ffbd533259cf37cb"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "74978222cbbb50bbe8f8411322e574a7"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3086b2ed7be0fa1bc04ce686d77a32ee"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 22685696,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.layers.45.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 3747840
},
{
"name": "model.layers.45.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 3762176
},
{
"name": "model.layers.45.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 22112256
}
],
"md5sum": "d3a6550f05741463f41de800391f5b1a"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "471d1faf63c6a076669475d7c0d45f3f"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3d16e73073aa5d832b7f3ad93fad676b"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.46.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "17689b1f05bafec3da6c01d8f0132a29"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e5df76975c492024251d4a2af552ce00"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f2ce246e39984b642ee00cda711fa365"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 13107200
},
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 13516800
},
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 15728640
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16834560
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 16844800
},
{
"name": "model.layers.46.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 16855040
},
{
"name": "model.layers.46.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 16869376
},
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17442816
},
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 30550016
},
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 30959616
}
],
"md5sum": "d624aeaa903fe9d657649f8899627ebd"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 33171456,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1105920
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1116160
},
{
"name": "model.layers.47.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1126400
},
{
"name": "model.layers.47.self_attn.c_attn.q_weight",
"shape": [
640,
7168
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 1140736
},
{
"name": "model.layers.47.self_attn.c_attn.q_scale",
"shape": [
40,
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 573440,
"byteOffset": 19490816
},
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
640,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20064256
}
],
"md5sum": "2307c89126234f2392f9550462723fd9"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
640,
27648
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0f9f107f1d4394d8be4c99358949a307"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
1728,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "6a5db0e4a7e884ac4a3d11fa49dc1d0d"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
640,
152064
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "59f4bba5bee2591acef26b3e93dbc28b"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 15923200,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
40,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 0
},
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
40,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 409600
},
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
108,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 2621440
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3727360
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3737600
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 3747840
},
{
"name": "lm_head.q_scale",
"shape": [
40,
152064
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12165120,
"byteOffset": 3758080
}
],
"md5sum": "46e1889c18a0af153b76338dc8684eb4"
}
]
}