gemma1.1-7b / ndarray-cache.json
ruihanglai's picture
init
fffbad8
{
"metadata": {
"ParamSize": 170,
"ParamBytes": 17075361792.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1572864000,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
256000,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864000,
"byteOffset": 0
}
],
"md5sum": "60ae32290a73af4d897daca869ced218"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "bbe91c0e9a7cc6b67a2ac8e433ad38ca"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "d6084ff7e8369ae1a3004ce30a5a3a8b"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "180aa61e6f9c9532539c4257fbb58992"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "f4addb4fe0430f3cdb3d2319bb44ba62"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "b7691ec6a77ca8625c98f97bd1635b06"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "160e7c76a13c050aeec492a2e2d72aab"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b1d4c02924ac0bac0c95681ffe5d7163"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "c49146f24474b26a7832f3ab4a83d39a"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "f01062ca986324737db0f17aaf5b99f2"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "484e852cde8ad23c2b0d8e5cfa5b5738"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4a09c9ce10ad9282efe2ac2b82221647"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "96f11c66cf3c5ca2e4a8521761f98e23"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "340727bd83ea2235d0074ec50233e934"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "5c8c43c7fafd195e9d744b9ef2fe7217"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1777bcd65b8d2726f1003d2630ce47c9"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "9bb0bb3b2b4befa28a13d43b5c2ba0e8"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "d75e37069c46ecdd74b80f8cc06cb6d1"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "f61a8e4f3be662e511bf8d2cfa546a74"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fd2ad1fb42458ff412cf6ab0eacd07a9"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "a4a0bec88d38218586cc0fecfe96f4a5"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "abd666001603d179c6fe9fef7ca52b81"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "40daa99215dbc17975c3167e09ad31e5"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "71be785d02736b5922a6cae0c8d1a831"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "fb2f730b5d66c9cdbc798358bef79ebe"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5165b9b2823cb5bd00219adede68d307"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "0f3ad4fb9d4b1c11625a92ab26b15565"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "8857080ba55c0ca02c1bfbd0204a05f5"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "b00a53f899e45d7276e99354143f1c61"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c310bc2010941c90ab5501059839d251"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "fb0a228dbb72498a441e98248a05f195"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "1bca1e158bbac017220d40d0d261992d"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "a35cff8ebbb396c6b83d2a2259dd72c4"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a6e50ab55df754b9034fa46797bacb4f"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "f0572dda1a68d882525bcf76be6ca777"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "291457429b0e015a4bcf8807b7fe55ed"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "df565ab47cc6f4afd0f77c7f05631b6c"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "978f92f008c4dd62822eef690eade5ea"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "f3d368b4cab5b1fc97723ec6a1821f77"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "898fa212a2a0c5d7dd6c6e8db2e1a71d"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "86fca1e31ddb2efa2ffd3876e106c10a"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "da402edecdd2e1a3c251ee0610a536e0"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "86a91af650313b154a76a83215b76df8"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "126bb57f096bf441e7f88808078630ad"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "87fbd15936e44077592e028134bc0376"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "00f38ed56e2d90c0227acd3acdcf0241"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "bbad0dd8e2e23dda60e4e061454e3abd"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "09118299bdefbae7d50ffa91c5b224d1"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "053d8c972995b8d83e61744e6e314c7f"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "ca72202cff818ef742ac296bc2600926"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "3686942021d12436094f0d9b3ef7b837"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "8470ce2af9e62816b6f73efaa79bd720"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "0c344d1f3c747dbad5db78df451ccf23"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "54b10013400455dd7cf82060c38bc4be"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "192a3d2fcf7d3c70d3dbc0a93737ca88"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "96d24ad55a29534d380ce2fd2e6c992c"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "2bd0598739a7587684fceeeb39086c6a"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a1dd21a5a45c4d270e8df09a1b934817"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "29670e1879eaa9af540508e6b3a38f2e"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "4d0e8592ac518ce1aea4e0a1bae33cfd"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "bd91da9cd9fd37fef6c0de05026c2034"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "12bc8d3ab3b5055f5f431c0971b91a78"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "520345638bf42df4cb55d8479ff770fe"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "ac934115b39eeb7519802192e1e103b9"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "63345fb1ae9f9b058b5883f41c33db14"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "dd8d733ee27a5e5dc6d41668d4d7cd6c"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "f5654529a63557c8e4005374d6cf296d"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3f4abeb620ef7ec88e4b7849c5658975"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "bd9921891a8e96b69c9a169e0b19ad88"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "0c0627ce937b0edd3e98b0dc2b163c5a"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "6a6beccbe8824ac3abb1d1ce498ceb9c"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f50c090dd54c9b05b0057e1b7afcdfd1"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "f0725fb767165df3f963fce9f3db89d0"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "873b23b9460ae71eddf36a532499c553"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "2e49ca76bb62f6fe4a00a5c8ee53dae7"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c5af0bf1da75e442102bb2ab9e2b5fdb"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "09ec40932166e5700f99f736b1576724"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "aa670b96411b8d184f4545b37acaa097"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "760fcb364bc239c2e77c39b96a9a7466"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "977188d5ec51682ebc6f673487072a99"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "90770a8cde2bea125b1cd9b51e333131"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "69db499e0e83a0aeb51c212848da685f"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "3c3bd1e3cc7b60660caf1d364538073e"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "14e408e470361728f075ca6e96d812ad"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "91252eb5ac7e8ff526b6a6539373e5ee"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "385639a8f299bf0e1086cf111c79feca"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "e25ade1d0ba5a8f660c1ff40aa876312"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "93afa47a95a2a34d6209880dfdcf2713"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "20d81e0b42f15f8cbe66b37be4ce7a62"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "d3f4a3ac0c3270e6a18d9c69cc8bbc19"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "f41aa07eb326a16bd1d4f25ca61372f3"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6757f617b416676c1ac978082017a06f"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "1daf7b3195a362019245b4aaac97afa3"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "dd1649b386d47e14b297af0eef72d212"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "e2c261a370277bf7345af16ae56ce3dd"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d0237722ad61da671a535e24681ac224"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "17032abea04fc43daf6864142a4fd24c"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2c661f393440ce8b8e3089da0b74df90"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "ae565af5cc648c6d29d43cf53253ea8b"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "2fb95a887ab3b66c052a264e3fe7711f"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "918eb03c29d4fccaa534f6396161eb69"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "9c2be4c01241aca2b838e678f6a4602e"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "5b05018af280f877ac02b185c02a8335"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b60134b8d6353407694018e6561c309e"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "1e725304f8124408742d3488dfd09f3b"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "ca7dd5e249a888b895aa3e19e01cf5ae"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "3b58c08fd5f34ba34ffb26f67561fab7"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0236d2e8b4ca91382c411d3618a26be8"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
3072,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "86a1cb3d42ed8c7cb9c91a923ed048ec"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 301989888,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
49152,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 301989888,
"byteOffset": 0
}
],
"md5sum": "7f26d02cec6df2d416595c0227777d67"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.weight",
"shape": [
12288,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "831436910bc5f74746081ba7de504bb9"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ab01465acddcdcbfc3f42866c1178acb"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 25516032,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 6144
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
3072,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 12288
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25178112
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25184256
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25190400
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25196544
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25202688
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25208832
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25214976
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25221120
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25227264
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25233408
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25239552
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25245696
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25251840
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25257984
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25264128
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25270272
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25276416
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25282560
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25288704
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25294848
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25300992
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25307136
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25313280
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25319424
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25325568
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25331712
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25337856
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25344000
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25350144
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25356288
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25362432
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25368576
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25374720
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25380864
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25387008
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25393152
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25399296
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25405440
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25411584
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25417728
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25423872
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25430016
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25436160
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25442304
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25448448
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25454592
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25460736
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25466880
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25473024
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25479168
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25485312
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25491456
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25497600
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25503744
},
{
"name": "model.norm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25509888
}
],
"md5sum": "f3ad81e901d1821a5626ed9c99f5e4a1"
}
]
}