Qwen2.5-1.5B-Instruct-q4f32_1-MLC / ndarray-cache-b16.json
riczhou's picture
Upload folder using huggingface_hub
2442d1f verified
raw
history blame
123 kB
{
"metadata": {
"ParamSize": 311,
"ParamBytes": 965310464.0,
"BitsPerParam": 5.002534272041053
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 116686848,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
151936,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 116686848,
"byteOffset": 0
}
],
"md5sum": "ca87ee6d9e9e02df255ccda13ca7d0b8"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 22330368,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
151936,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 14585856,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 14585856
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 14588928
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 21470208
}
],
"md5sum": "e45cdc4b6268bba59d20dea2c6c665f4"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "fb38d7aec9ec1775a91d4e0392478714"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "6415f18b1e2fc707b473eeb6befec4e4"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "747c912ff0f1d54074a5a341ff70148c"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "d8123a0e022d23f1e641603e2e5586c3"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "4c38f99bd037849287606e43d4e11a51"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "6d0c7b2bbba7ef80f251812254b444f3"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "fa080a5b3fb7c1ef234d989987fde651"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "669db28255fca31a814ae855905ed0f7"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "f5ab5bfcb74cbe382199c1e7060e1c3f"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "10318207f6f4b6d9ef199836e46f3f70"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "c32ec6278bc6866241bd2d51a3a091fb"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "fd7caef839e155c9e4ca5a18bef4441d"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "5e72f159fa21f36f5bfed70eca5eb94d"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "38e96ee5bf1b6cbf364aa2a5f24b07ee"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "bc8c2256b847142a4f0ad6c047eebeaa"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "fc22f670c70227eea8563a8ded8c3341"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "0be2ef5a862a11abcf73a9fc57cddf96"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "b4e95abfc4d172a3843cc87c44ad808a"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "513fdcf5d5835ac732d69054d2db47ea"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "ccd627fc2514679379dc81c17e3b6352"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "c3c9a5ff3319f3aa8128153f532785d6"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "6dcb78d1a4f5fff77ac6912ccb3a2bd2"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "828a73fe385a4105981b83167fb18380"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "ecaa8512189836955d961b814da37169"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "e7c6b97dfdb335781b2d1e5068b38a77"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "eb9ad13e3efc5cd413ce127ec187ffb1"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 26331136,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1536,
1120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6881280,
"byteOffset": 18589696
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1536,
280
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 860160,
"byteOffset": 25470976
}
],
"md5sum": "3b15bb64a88ccd2c1147db7471b87cd1"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 18589696,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
17920,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13762560,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
17920,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1720320,
"byteOffset": 13762560
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 15482880
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 15485952
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
2048,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15490048
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
2048,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 196608,
"byteOffset": 17062912
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17259520
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 147456,
"byteOffset": 18439168
},
{
"name": "model.norm.weight",
"shape": [
1536
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3072,
"byteOffset": 18586624
}
],
"md5sum": "9f8a178c5794698ab1ec65f3a808a826"
}
]
}