diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4183 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 2388848640.0, + "BitsPerParam": 5.001410952042906 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 49250304, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32064, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 49250304, + "byteOffset": 0 + } + ], + "md5sum": "3aeeb3d7e9d8e033e2294aaa3617c930" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.21.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4c6eb27388de688ccd2fa3a8ee670b51" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 23470080, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32064, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6156288, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 6156288 + }, + { + "name": "transformer.h.21.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 6162432 + }, + { + "name": "transformer.h.21.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 18745344 + }, + { + "name": "transformer.h.21.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 20318208 + }, + { + "name": "transformer.h.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23463936 + } + ], + "md5sum": "b79b4d227556e018376ea5d6fd622968" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.22.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e2b23df08fdde2a47ae85ad21aa558a0" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.21.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.22.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.22.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.22.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.22.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "725020e4e78739a8f6343cb415878cd0" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.22.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.22.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.22.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.22.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.23.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "e4ae28ecd247c29bbc4928f992d1bced" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.23.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "275662165301882e56bced0cfecdbab7" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.23.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.23.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.23.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.23.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "f45f08bcf9df989fddf13aa89358f016" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.24.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ce8fcd47094a2b30452c24ce679cbe7c" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.23.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.24.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.24.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.24.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.24.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "d09017dad54cf3f00e5e172b62477a0a" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.24.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.24.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.24.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.24.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.25.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "4c9676bece9b5e7b221a69336026a268" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.25.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0e72d98ffd8355dd7be0b1a4fcffa048" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.25.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.25.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.25.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.25.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.25.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "d09eeff14583901b180dc6eccb4ace12" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.26.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "99b504cdeea00e6028b34a65c94a9535" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.25.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.25.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.26.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.26.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.26.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.26.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "c67c1651ca7d9ca3cdcff92dbf9d50d5" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.26.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.26.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.26.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.26.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.27.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "894a738901cf878c2c8494742c86099d" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.27.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b81cca171ee50bf0c5765e60cd249633" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.27.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.27.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.27.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.27.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.27.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "cfa61f772775738e0a0353ec164421c6" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.28.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "cc4e16d406507daff55b0fcf11c2be6d" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.27.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.27.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.28.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.28.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.28.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.28.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.28.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "99e55559e6cf21d7141bf69d453a6298" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.28.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.28.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.28.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.28.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.29.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "cf72032d2f29bab3fe90b58a0c58cb36" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.29.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "057b59306b4ad62ffd608fa7431df6c4" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.29.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.29.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.29.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.29.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.29.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.29.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "be5495faf43ff8ef7b41e2e1591b2f63" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.30.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "90dc59780c48c89ca310944617918b30" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.29.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.29.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.30.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.30.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.30.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.30.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.30.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "357d7ff9e326fb7afb1c19ebabb9437e" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.30.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.30.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.30.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.30.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.31.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "d650c22c656bd85695c90079e696df7d" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.31.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "20370d2b717a8e485de07646ba0e31d7" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.31.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.31.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.31.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.31.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.31.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.31.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "ca3d3d45623ec5e503780e7e08b06241" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 49250304, + "records": [ + { + "name": "transformer.embd.q_weight", + "shape": [ + 32064, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 49250304, + "byteOffset": 0 + } + ], + "md5sum": "6151d64f2428a5c1d62203744ae7522f" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 22093824, + "records": [ + { + "name": "transformer.h.31.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.31.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.embd.q_scale", + "shape": [ + 32064, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6156288, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.0.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 22087680 + } + ], + "md5sum": "3f943d23a92f58ba96157b96389476b5" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.0.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4343c908c761bf45cd71ea2342de2e9d" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.0.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.0.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.0.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.0.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "c2bd6ef56d875c4f0adf154cfe47c933" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.1.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "cd7f902c8eeaa61096ce9f4b28851caf" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.0.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.1.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.1.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.1.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.1.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "281620d48c1b4b25171eac94add6e6a0" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.1.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.1.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.1.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.1.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.10.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "71c800a9c8f2688b5a09fab340b952ab" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.10.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "eb56232c87f198c03c89d18e5ef36b67" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.10.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.10.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.10.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.10.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.10.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "f1697a0679ed6fa119d0f06c2d6afd9a" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.11.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1c21ba64c0480980df4feb90e466ba7c" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.10.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.10.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.11.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.11.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.11.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.11.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "357ddc41322e4e58c1500316ba5eff87" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.11.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.11.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.11.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.11.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.12.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "ca57db2f19189c4ee50f1b259502345c" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.12.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a0f302db7e317d5affa27c8f6c1ba903" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.12.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.12.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.12.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.12.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "7ccbda76aa15483e6a3f15eba7fc6a70" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.13.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "efa96e2bc14423470f06edd30dd8a1ee" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.12.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.13.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.13.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.13.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.13.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "4a5dc9c0b947885e1cf127639fbfe632" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.13.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.13.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.13.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.13.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.14.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "67903c92547a920e3a8160a591c51044" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.14.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "482cbcfc8565a260833ac2ef6f854dcf" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.14.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.14.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.14.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.14.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "6e5cdaaa46681ebee024b02fcc59d597" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.15.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "20f1425c7d6d4ebe6d38086e8014f8b5" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.14.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.15.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.15.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.15.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.15.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "ad9cff41d1e77691fe35d1e2d1cc288f" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.15.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.15.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.15.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.15.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.16.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "53c4c98a271b4528f433486363134787" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.16.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "cb442181137be51100cc66942af5d750" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.16.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.16.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.16.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.16.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.16.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "f1f65ff2578babdee1f79b9746f53777" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.17.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "43deb776cdcf11197e52daa5eaf65bb4" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.16.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.16.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.17.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.17.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.17.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.17.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "30e4627221d5cd27e21f27c419916cdc" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.17.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.17.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.17.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.17.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.18.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "958f964c7c041d88ee24eaea103f9f5b" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.18.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0916edaf606a71efca575bdab3eb7fed" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.18.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.18.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.18.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.18.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.18.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "f384838842289a74a8033bbb4b0c6752" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.19.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "87402cfadbf5f167aff039e25e5bd8df" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.18.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.18.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.19.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.19.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.19.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.19.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "26e8a37ec5bffe7133a9bc27c1a78cb3" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.19.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.19.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.19.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.19.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.2.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "9298980b2969ff9185155c63f27c4833" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.2.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "fe08c7935fa15b57efc3c4091e779770" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.2.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.2.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.2.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.2.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "b5de4b357981555a6ffb6b1508b80c3c" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.20.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "884ecdd4f22892d8bdcc791aa3cd6452" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.2.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.20.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.20.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.20.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.20.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "8b478873b579011c9503b8e1c18da89b" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 26548224, + "records": [ + { + "name": "transformer.h.20.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.20.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.20.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.20.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.21.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21233664 + }, + { + "name": "transformer.h.21.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 25952256 + }, + { + "name": "transformer.h.3.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26542080 + } + ], + "md5sum": "d868060e32452d6ad8de1939afa175a0" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.3.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c863795534140ccc964b6a6bb5477722" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.3.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.3.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.3.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.3.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.3.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "5ee6f1c5c478e7defad5adc986d203bd" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.4.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "fd4abc21b279b19fa3e7021fb1bdd6d8" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.3.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.3.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.4.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.4.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.4.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.4.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "92cf09176b4af272ff88b4d770bebb49" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.4.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.4.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.4.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.4.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.5.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "f2576337c4f7e49b7f289cb52ca9691c" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.5.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4aad834b4bc23460843517ff71d6a03c" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.5.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.5.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.5.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.5.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.5.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "76b77a44a3819f7b935f5503820080c9" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.6.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "28d4502b269e7fdaef6d85e023fb8181" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.5.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.5.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.6.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.6.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.6.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.6.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "122540bf729688fc534acaaf84d3a3f8" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.6.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.6.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.6.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.6.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.7.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "0a15e33fd4087a189cc2284bfdfe05b2" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.7.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "552a946908bb2a8e68df3ad4124df411" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.7.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.7.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.7.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.7.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.7.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "39c5ce0bbd57ffc7293c1f21a4340678" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.8.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d394cec2b8266f047c085ca6b27a23de" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.7.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.7.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.8.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.8.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.8.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.8.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "c702901239fe22e5e139aa2b315eb4ab" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.8.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.8.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.8.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.8.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.9.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "cea066ba9a628075f151bf691f5c0792" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.9.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "548f267eb43d829b7094b0544bc8e46e" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.9.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.9.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.9.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.9.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.9.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "91104658482ada6ec8851f5670890038" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 15925248, + "records": [ + { + "name": "transformer.h.9.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.9.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + } + ], + "md5sum": "acfd1a32ed7e0091ec650cbfc933b085" + } + ] +} \ No newline at end of file