diff --git a/mlc-chat-config.json b/mlc-chat-config.json index 5a09c5555279127a6412f6755e529783603d479f..31e916c136bc35708260e1f7a43e1d5921d21409 100644 --- a/mlc-chat-config.json +++ b/mlc-chat-config.json @@ -27,11 +27,11 @@ "attention_sink_size": -1, "tensor_parallel_shards": 1, "pipeline_parallel_stages": 1, - "temperature": 0.7, + "temperature": 1.0, "presence_penalty": 0.0, "frequency_penalty": 0.0, - "repetition_penalty": 1.1, - "top_p": 0.8, + "repetition_penalty": 1.0, + "top_p": 1.0, "tokenizer_files": [ "tokenizer.json", "vocab.json", @@ -74,10 +74,7 @@ "function_string": "", "use_function_calling": false }, - "pad_token_id": 151643, + "pad_token_id": 0, "bos_token_id": 151643, - "eos_token_id": [ - 151645, - 151643 - ] + "eos_token_id": 151643 } \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json index a33dba0fccc8197692921ef9f6ac0d3a468f0c85..0e4138312832e03a0a6a96dffe44c56f4d1773b7 100644 --- a/ndarray-cache.json +++ b/ndarray-cache.json @@ -2,7 +2,7 @@ "metadata": { "ParamSize": 198, "ParamBytes": 3087428608.0, - "BitsPerParam": 16.0 + "BitsPerParam": 13.89882147873375 }, "records": [ { @@ -22,7 +22,7 @@ "byteOffset": 0 } ], - "md5sum": "4004155c68c07080f39231f289b9c9d3" + "md5sum": "fac6ce8ba0d8ae438d10b5bb546c7cbd" }, { "dataPath": "params_shard_1.bin", @@ -41,64 +41,15 @@ "byteOffset": 0 } ], - "md5sum": "8d9c6137ab1639c5864a3e4f0d47a945" + "md5sum": "b0c672ae0f591da2d49f4dce650e72ae" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", - "nbytes": 27535360, - "records": [ - { - "name": "model.layers.0.input_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 0 - }, - { - "name": "model.layers.0.mlp.down_proj.weight", - "shape": [ - 1536, - 8960 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 27525120, - "byteOffset": 3072 - }, - { - "name": "model.layers.0.post_attention_layernorm.weight", - "shape": [ - 1536 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 27528192 - }, - { - "name": "model.layers.0.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 27531264 - } - ], - "md5sum": "9fd8f6531c7b68276a0e21513cf4d24c" - }, - { - "dataPath": "params_shard_3.bin", - "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.1.mlp.down_proj.weight", + "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -109,10 +60,10 @@ "byteOffset": 0 } ], - "md5sum": "db7e25bc89a29b9b7bbf6714baa3dc24" + "md5sum": "db1e935ff9f5b6ff5da80f8b6d1628ff" }, { - "dataPath": "params_shard_4.bin", + "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ @@ -128,15 +79,15 @@ "byteOffset": 0 } ], - "md5sum": "13cf30b3f20ad035712c136f18f7d38d" + "md5sum": "baee6aadc8be0f23324126937a4cf15b" }, { - "dataPath": "params_shard_5.bin", + "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.10.mlp.down_proj.weight", + "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -147,15 +98,15 @@ "byteOffset": 0 } ], - "md5sum": "dfd4e0fba5875fd1d7db311377b87fe5" + "md5sum": "9581d907a6edac4b6bc1c357d0d2343f" }, { - "dataPath": "params_shard_6.bin", + "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.10.mlp.gate_up_proj.weight", + "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -166,15 +117,15 @@ "byteOffset": 0 } ], - "md5sum": "d3c8c121be697e30d9da502d459908dc" + "md5sum": "a8b915076ac9a72930fb6871e62d46ef" }, { - "dataPath": "params_shard_7.bin", + "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.11.mlp.down_proj.weight", + "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -185,29 +136,10 @@ "byteOffset": 0 } ], - "md5sum": "c303778173a91bfdc754b414416c5160" - }, - { - "dataPath": "params_shard_8.bin", - "format": "raw-shard", - "nbytes": 55050240, - "records": [ - { - "name": "model.layers.11.mlp.gate_up_proj.weight", - "shape": [ - 17920, - 1536 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 55050240, - "byteOffset": 0 - } - ], - "md5sum": "ab163c3a0afabe864dc1dfaeae1a9908" + "md5sum": "972150741a85989c34825d9c970f6741" }, { - "dataPath": "params_shard_9.bin", + "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ @@ -223,45 +155,45 @@ "byteOffset": 0 }, { - "name": "model.layers.0.self_attn.o_proj.weight", + "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 6291456 }, { - "name": "model.layers.1.input_layernorm.weight", + "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 11010048 + "nbytes": 4718592, + "byteOffset": 6295552 }, { - "name": "model.layers.1.post_attention_layernorm.weight", + "name": "model.layers.0.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 11013120 + "byteOffset": 11014144 }, { - "name": "model.layers.1.self_attn.c_attn.bias", + "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 11016192 + "nbytes": 3072, + "byteOffset": 11017216 }, { "name": "model.layers.1.self_attn.c_attn.weight", @@ -275,48 +207,48 @@ "byteOffset": 11020288 }, { - "name": "model.layers.1.self_attn.o_proj.weight", + "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 17311744 }, { - "name": "model.layers.10.input_layernorm.weight", + "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 22030336 + "nbytes": 4718592, + "byteOffset": 17315840 }, { - "name": "model.layers.10.post_attention_layernorm.weight", + "name": "model.layers.1.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 22033408 + "byteOffset": 22034432 }, { - "name": "model.layers.10.self_attn.c_attn.bias", + "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 22036480 + "nbytes": 3072, + "byteOffset": 22037504 }, { - "name": "model.layers.10.self_attn.c_attn.weight", + "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -327,7 +259,17 @@ "byteOffset": 22040576 }, { - "name": "model.layers.10.self_attn.o_proj.weight", + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28332032 + }, + { + "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 1536, 1536 @@ -335,48 +277,57 @@ "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, - "byteOffset": 28332032 + "byteOffset": 28336128 }, { - "name": "model.layers.11.input_layernorm.weight", + "name": "model.layers.2.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33050624 + "byteOffset": 33054720 }, { - "name": "model.layers.11.post_attention_layernorm.weight", + "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33053696 - }, + "byteOffset": 33057792 + } + ], + "md5sum": "cc217b23db4df89fcfed2ffa54ee328a" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.11.self_attn.c_attn.bias", + "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ - 2048 + 17920, + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 33056768 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "2e9b51ed2ac487ad15028dc401beeb7d" + "md5sum": "969f8fa80cdf5b272c2e607a2119c4e5" }, { - "dataPath": "params_shard_10.bin", + "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.12.mlp.down_proj.weight", + "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -387,15 +338,15 @@ "byteOffset": 0 } ], - "md5sum": "cc304778043000c0f442d99b072b593c" + "md5sum": "d860175d6e9380a30cfbe06262104ec7" }, { - "dataPath": "params_shard_11.bin", + "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.12.mlp.gate_up_proj.weight", + "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -406,15 +357,15 @@ "byteOffset": 0 } ], - "md5sum": "26700b8899f4d41d92f8439f3e0f1c28" + "md5sum": "3f5dad1ba616506c4e5084bb18bbf1d6" }, { - "dataPath": "params_shard_12.bin", + "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.13.mlp.down_proj.weight", + "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -425,15 +376,15 @@ "byteOffset": 0 } ], - "md5sum": "2ddac3acda693d7656dc82d1cb44a5fa" + "md5sum": "b993af8733b274d5e5aaacdc6462f74f" }, { - "dataPath": "params_shard_13.bin", + "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.13.mlp.gate_up_proj.weight", + "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -444,15 +395,15 @@ "byteOffset": 0 } ], - "md5sum": "00477d2aa802e741a31ae46a2af61519" + "md5sum": "2877a35f91a04b1c58f6a824e8d61e87" }, { - "dataPath": "params_shard_14.bin", + "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.14.mlp.down_proj.weight", + "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -463,34 +414,15 @@ "byteOffset": 0 } ], - "md5sum": "a0eb921170b36a016d3b8656caa53b8d" - }, - { - "dataPath": "params_shard_15.bin", - "format": "raw-shard", - "nbytes": 55050240, - "records": [ - { - "name": "model.layers.14.mlp.gate_up_proj.weight", - "shape": [ - 17920, - 1536 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 55050240, - "byteOffset": 0 - } - ], - "md5sum": "d80214adb8ccfa0343cf8adb0caec5a4" + "md5sum": "8bad654a3c485e170a6f0e5ddab81db0" }, { - "dataPath": "params_shard_16.bin", + "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { - "name": "model.layers.11.self_attn.c_attn.weight", + "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -501,48 +433,48 @@ "byteOffset": 0 }, { - "name": "model.layers.11.self_attn.o_proj.weight", + "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 6291456 }, { - "name": "model.layers.12.input_layernorm.weight", + "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 11010048 + "nbytes": 4718592, + "byteOffset": 6295552 }, { - "name": "model.layers.12.post_attention_layernorm.weight", + "name": "model.layers.3.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 11013120 + "byteOffset": 11014144 }, { - "name": "model.layers.12.self_attn.c_attn.bias", + "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 11016192 + "nbytes": 3072, + "byteOffset": 11017216 }, { - "name": "model.layers.12.self_attn.c_attn.weight", + "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -553,48 +485,48 @@ "byteOffset": 11020288 }, { - "name": "model.layers.12.self_attn.o_proj.weight", + "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 17311744 }, { - "name": "model.layers.13.input_layernorm.weight", + "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 22030336 + "nbytes": 4718592, + "byteOffset": 17315840 }, { - "name": "model.layers.13.post_attention_layernorm.weight", + "name": "model.layers.4.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 22033408 + "byteOffset": 22034432 }, { - "name": "model.layers.13.self_attn.c_attn.bias", + "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 22036480 + "nbytes": 3072, + "byteOffset": 22037504 }, { - "name": "model.layers.13.self_attn.c_attn.weight", + "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -605,75 +537,56 @@ "byteOffset": 22040576 }, { - "name": "model.layers.13.self_attn.o_proj.weight", + "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 28332032 }, { - "name": "model.layers.14.input_layernorm.weight", + "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 33050624 + "nbytes": 4718592, + "byteOffset": 28336128 }, { - "name": "model.layers.14.post_attention_layernorm.weight", + "name": "model.layers.5.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33053696 + "byteOffset": 33054720 }, { - "name": "model.layers.14.self_attn.c_attn.bias", - "shape": [ - 2048 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 33056768 - } - ], - "md5sum": "836ad76f4bf6b3ea6142a57d73863eab" - }, - { - "dataPath": "params_shard_17.bin", - "format": "raw-shard", - "nbytes": 27525120, - "records": [ - { - "name": "model.layers.15.mlp.down_proj.weight", + "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ - 1536, - 8960 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 27525120, - "byteOffset": 0 + "nbytes": 3072, + "byteOffset": 33057792 } ], - "md5sum": "69c8ebf98b69e0a7bed86b1b095d4e58" + "md5sum": "9a67ac0bf8178e4054e781eb27f403ea" }, { - "dataPath": "params_shard_18.bin", + "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.15.mlp.gate_up_proj.weight", + "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -684,15 +597,15 @@ "byteOffset": 0 } ], - "md5sum": "bc5b5e85e1e1ca4a340dc0ae73d0cf70" + "md5sum": "ccd5502649f7168e48df2ccbaeeb1d9b" }, { - "dataPath": "params_shard_19.bin", + "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.16.mlp.down_proj.weight", + "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -703,15 +616,15 @@ "byteOffset": 0 } ], - "md5sum": "1f0d7a92b72e8c084a47173269fabe37" + "md5sum": "271cf1aad384c2d628d1aaf62a140a9c" }, { - "dataPath": "params_shard_20.bin", + "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.16.mlp.gate_up_proj.weight", + "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -722,15 +635,15 @@ "byteOffset": 0 } ], - "md5sum": "7868afb9efef145589b5d006eaa1d2cb" + "md5sum": "901c1912a0418111b928559f750e0d95" }, { - "dataPath": "params_shard_21.bin", + "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.17.mlp.down_proj.weight", + "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -741,15 +654,15 @@ "byteOffset": 0 } ], - "md5sum": "64dd36a133277b89ecb7b753fa45a053" + "md5sum": "379946453685b3ee0b55935c9538eecd" }, { - "dataPath": "params_shard_22.bin", + "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.17.mlp.gate_up_proj.weight", + "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -760,15 +673,34 @@ "byteOffset": 0 } ], - "md5sum": "9ed20f54aebad5411cce28791db1bd4f" + "md5sum": "56fa2871d8315eef920e5df9970ea9fe" }, { - "dataPath": "params_shard_23.bin", + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 1536, + 8960 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "817b9f5a7e095c5f05876994890301b4" + }, + { + "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { - "name": "model.layers.14.self_attn.c_attn.weight", + "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -779,48 +711,48 @@ "byteOffset": 0 }, { - "name": "model.layers.14.self_attn.o_proj.weight", + "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 6291456 }, { - "name": "model.layers.15.input_layernorm.weight", + "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 11010048 + "nbytes": 4718592, + "byteOffset": 6295552 }, { - "name": "model.layers.15.post_attention_layernorm.weight", + "name": "model.layers.6.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 11013120 + "byteOffset": 11014144 }, { - "name": "model.layers.15.self_attn.c_attn.bias", + "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 11016192 + "nbytes": 3072, + "byteOffset": 11017216 }, { - "name": "model.layers.15.self_attn.c_attn.weight", + "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -831,48 +763,48 @@ "byteOffset": 11020288 }, { - "name": "model.layers.15.self_attn.o_proj.weight", + "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 17311744 }, { - "name": "model.layers.16.input_layernorm.weight", + "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 22030336 + "nbytes": 4718592, + "byteOffset": 17315840 }, { - "name": "model.layers.16.post_attention_layernorm.weight", + "name": "model.layers.7.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 22033408 + "byteOffset": 22034432 }, { - "name": "model.layers.16.self_attn.c_attn.bias", + "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 22036480 + "nbytes": 3072, + "byteOffset": 22037504 }, { - "name": "model.layers.16.self_attn.c_attn.weight", + "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -883,7 +815,17 @@ "byteOffset": 22040576 }, { - "name": "model.layers.16.self_attn.o_proj.weight", + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28332032 + }, + { + "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 1536, 1536 @@ -891,48 +833,57 @@ "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, - "byteOffset": 28332032 + "byteOffset": 28336128 }, { - "name": "model.layers.17.input_layernorm.weight", + "name": "model.layers.8.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33050624 + "byteOffset": 33054720 }, { - "name": "model.layers.17.post_attention_layernorm.weight", + "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33053696 - }, + "byteOffset": 33057792 + } + ], + "md5sum": "bcd5c8951311cb8debd10c7a9cbd2764" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.17.self_attn.c_attn.bias", + "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ - 2048 + 17920, + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 33056768 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "d3ca8d87d2da5c44ba5209a2791736da" + "md5sum": "faf5cbdd3c2ff11883ce9f8554bcf498" }, { - "dataPath": "params_shard_24.bin", + "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.18.mlp.down_proj.weight", + "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -943,15 +894,15 @@ "byteOffset": 0 } ], - "md5sum": "3dd39a6d6ad6ed3ea7b684810336118a" + "md5sum": "17ae02bef8401386b5eb9f653b2f420a" }, { - "dataPath": "params_shard_25.bin", + "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.18.mlp.gate_up_proj.weight", + "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -962,15 +913,15 @@ "byteOffset": 0 } ], - "md5sum": "3a8f050e1c81d9c21bd78dc7df0d4bc4" + "md5sum": "69af3fefd0ad33870219cab220d98c1b" }, { - "dataPath": "params_shard_26.bin", + "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.19.mlp.down_proj.weight", + "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -981,15 +932,15 @@ "byteOffset": 0 } ], - "md5sum": "36ca71a16ca2834d3164b92c97a170ab" + "md5sum": "5862aaa3d0c9514ced28f0c391b70d8c" }, { - "dataPath": "params_shard_27.bin", + "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.19.mlp.gate_up_proj.weight", + "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -1000,15 +951,15 @@ "byteOffset": 0 } ], - "md5sum": "9f22ca095f2021608fae9bc98136d252" + "md5sum": "221ad0e352c9a06fbd95fa8990829aac" }, { - "dataPath": "params_shard_28.bin", + "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.2.mlp.down_proj.weight", + "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -1019,34 +970,15 @@ "byteOffset": 0 } ], - "md5sum": "fab4a0e7de9cca7058251ee57a4a9742" - }, - { - "dataPath": "params_shard_29.bin", - "format": "raw-shard", - "nbytes": 55050240, - "records": [ - { - "name": "model.layers.2.mlp.gate_up_proj.weight", - "shape": [ - 17920, - 1536 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 55050240, - "byteOffset": 0 - } - ], - "md5sum": "e6b441ea46b5d4ba144d0ede689da1a2" + "md5sum": "605ca260375f758e22d4265fffa356fc" }, { - "dataPath": "params_shard_30.bin", + "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { - "name": "model.layers.17.self_attn.c_attn.weight", + "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -1057,48 +989,48 @@ "byteOffset": 0 }, { - "name": "model.layers.17.self_attn.o_proj.weight", + "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 6291456 }, { - "name": "model.layers.18.input_layernorm.weight", + "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 11010048 + "nbytes": 4718592, + "byteOffset": 6295552 }, { - "name": "model.layers.18.post_attention_layernorm.weight", + "name": "model.layers.9.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 11013120 + "byteOffset": 11014144 }, { - "name": "model.layers.18.self_attn.c_attn.bias", + "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 11016192 + "nbytes": 3072, + "byteOffset": 11017216 }, { - "name": "model.layers.18.self_attn.c_attn.weight", + "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -1109,48 +1041,48 @@ "byteOffset": 11020288 }, { - "name": "model.layers.18.self_attn.o_proj.weight", + "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 17311744 }, { - "name": "model.layers.19.input_layernorm.weight", + "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 22030336 + "nbytes": 4718592, + "byteOffset": 17315840 }, { - "name": "model.layers.19.post_attention_layernorm.weight", + "name": "model.layers.10.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 22033408 + "byteOffset": 22034432 }, { - "name": "model.layers.19.self_attn.c_attn.bias", + "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 22036480 + "nbytes": 3072, + "byteOffset": 22037504 }, { - "name": "model.layers.19.self_attn.c_attn.weight", + "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -1161,75 +1093,56 @@ "byteOffset": 22040576 }, { - "name": "model.layers.19.self_attn.o_proj.weight", + "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 28332032 }, { - "name": "model.layers.2.input_layernorm.weight", + "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 33050624 + "nbytes": 4718592, + "byteOffset": 28336128 }, { - "name": "model.layers.2.post_attention_layernorm.weight", + "name": "model.layers.11.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33053696 + "byteOffset": 33054720 }, { - "name": "model.layers.2.self_attn.c_attn.bias", + "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 33056768 + "nbytes": 3072, + "byteOffset": 33057792 } ], - "md5sum": "ec32c77b7b448670b9fbb3e4dadd2b9f" + "md5sum": "03e434a9483fb2aaa19f4cab3e4fb083" }, { - "dataPath": "params_shard_31.bin", + "dataPath": "params_shard_29.bin", "format": "raw-shard", - "nbytes": 27525120, + "nbytes": 55050240, "records": [ { - "name": "model.layers.20.mlp.down_proj.weight", - "shape": [ - 1536, - 8960 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 27525120, - "byteOffset": 0 - } - ], - "md5sum": "169d2af7bb908d1d1319b2327ffd685d" - }, - { - "dataPath": "params_shard_32.bin", - "format": "raw-shard", - "nbytes": 55050240, - "records": [ - { - "name": "model.layers.20.mlp.gate_up_proj.weight", + "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -1240,15 +1153,15 @@ "byteOffset": 0 } ], - "md5sum": "549d4ada0599a1d35bf7dd3f87dfad84" + "md5sum": "dc39bbf30e260c4170b068cc8553d7cd" }, { - "dataPath": "params_shard_33.bin", + "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.21.mlp.down_proj.weight", + "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -1259,15 +1172,15 @@ "byteOffset": 0 } ], - "md5sum": "6ea484043810c0eeff8409f4fffc7c14" + "md5sum": "d6ebf5371ced23357eb54793db3a4afb" }, { - "dataPath": "params_shard_34.bin", + "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.21.mlp.gate_up_proj.weight", + "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -1278,15 +1191,15 @@ "byteOffset": 0 } ], - "md5sum": "f09443ff1e071a170ee446e8b5c9e983" + "md5sum": "b23a6222ce16baad47b61fe41f8660b0" }, { - "dataPath": "params_shard_35.bin", + "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.22.mlp.down_proj.weight", + "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -1297,15 +1210,15 @@ "byteOffset": 0 } ], - "md5sum": "367fc7511fd53ad92f83eff81ee49dc2" + "md5sum": "f579126381720332c477dfeb50701e20" }, { - "dataPath": "params_shard_36.bin", + "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.22.mlp.gate_up_proj.weight", + "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -1316,15 +1229,34 @@ "byteOffset": 0 } ], - "md5sum": "0a8e85d059205dd0b12cbfd488dbcc1f" + "md5sum": "44b78e073ac8337e052b9744380ec3c7" }, { - "dataPath": "params_shard_37.bin", + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 1536, + 8960 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "d205aa713ee99dee09b571cfedc7cefd" + }, + { + "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { - "name": "model.layers.2.self_attn.c_attn.weight", + "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -1335,48 +1267,48 @@ "byteOffset": 0 }, { - "name": "model.layers.2.self_attn.o_proj.weight", + "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 6291456 }, { - "name": "model.layers.20.input_layernorm.weight", + "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 11010048 + "nbytes": 4718592, + "byteOffset": 6295552 }, { - "name": "model.layers.20.post_attention_layernorm.weight", + "name": "model.layers.12.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 11013120 + "byteOffset": 11014144 }, { - "name": "model.layers.20.self_attn.c_attn.bias", + "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 11016192 + "nbytes": 3072, + "byteOffset": 11017216 }, { - "name": "model.layers.20.self_attn.c_attn.weight", + "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -1387,48 +1319,48 @@ "byteOffset": 11020288 }, { - "name": "model.layers.20.self_attn.o_proj.weight", + "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 17311744 }, { - "name": "model.layers.21.input_layernorm.weight", + "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 22030336 + "nbytes": 4718592, + "byteOffset": 17315840 }, { - "name": "model.layers.21.post_attention_layernorm.weight", + "name": "model.layers.13.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 22033408 + "byteOffset": 22034432 }, { - "name": "model.layers.21.self_attn.c_attn.bias", + "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 22036480 + "nbytes": 3072, + "byteOffset": 22037504 }, { - "name": "model.layers.21.self_attn.c_attn.weight", + "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -1439,7 +1371,17 @@ "byteOffset": 22040576 }, { - "name": "model.layers.21.self_attn.o_proj.weight", + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28332032 + }, + { + "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 1536, 1536 @@ -1447,48 +1389,57 @@ "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, - "byteOffset": 28332032 + "byteOffset": 28336128 }, { - "name": "model.layers.22.input_layernorm.weight", + "name": "model.layers.14.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33050624 + "byteOffset": 33054720 }, { - "name": "model.layers.22.post_attention_layernorm.weight", + "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33053696 - }, + "byteOffset": 33057792 + } + ], + "md5sum": "54c052279cd7ffad4678b87826192f76" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.22.self_attn.c_attn.bias", + "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ - 2048 + 17920, + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 33056768 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "670d9f64575bb07241d48a8283edc7c3" + "md5sum": "0dac5ce31c1f2eaa17a56d0da55f7b6d" }, { - "dataPath": "params_shard_38.bin", + "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.23.mlp.down_proj.weight", + "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -1499,15 +1450,15 @@ "byteOffset": 0 } ], - "md5sum": "55dc4417efdd8592cf2596108dc49e03" + "md5sum": "6b9f7819701341646bc2a266108f95b2" }, { - "dataPath": "params_shard_39.bin", + "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.23.mlp.gate_up_proj.weight", + "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -1518,15 +1469,15 @@ "byteOffset": 0 } ], - "md5sum": "59aae069c3cf2e5dfc148b92384dea3d" + "md5sum": "04cd76c5b6fc4ebe6baaa8644b6930fd" }, { - "dataPath": "params_shard_40.bin", + "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.24.mlp.down_proj.weight", + "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -1537,15 +1488,15 @@ "byteOffset": 0 } ], - "md5sum": "3ee6d1d9d6a0d40aa5c6cda617a5d1bb" + "md5sum": "349f0c70d75d8a410f1b236fd7e395b5" }, { - "dataPath": "params_shard_41.bin", + "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.24.mlp.gate_up_proj.weight", + "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -1556,15 +1507,15 @@ "byteOffset": 0 } ], - "md5sum": "a1c9e0afa3d19157d9470ecbe8d02441" + "md5sum": "63960103006f54f63a378784156e1ff6" }, { - "dataPath": "params_shard_42.bin", + "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.25.mlp.down_proj.weight", + "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -1575,34 +1526,15 @@ "byteOffset": 0 } ], - "md5sum": "0c88c6fbbe93a15c6d1c34b64769e9ee" - }, - { - "dataPath": "params_shard_43.bin", - "format": "raw-shard", - "nbytes": 55050240, - "records": [ - { - "name": "model.layers.25.mlp.gate_up_proj.weight", - "shape": [ - 17920, - 1536 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 55050240, - "byteOffset": 0 - } - ], - "md5sum": "bf742044703c2a29f0c83f872a30751a" + "md5sum": "0f07069c62ca1e2d8c3e7ee712fe82b0" }, { - "dataPath": "params_shard_44.bin", + "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { - "name": "model.layers.22.self_attn.c_attn.weight", + "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -1613,48 +1545,48 @@ "byteOffset": 0 }, { - "name": "model.layers.22.self_attn.o_proj.weight", + "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 6291456 }, { - "name": "model.layers.23.input_layernorm.weight", + "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 11010048 + "nbytes": 4718592, + "byteOffset": 6295552 }, { - "name": "model.layers.23.post_attention_layernorm.weight", + "name": "model.layers.15.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 11013120 + "byteOffset": 11014144 }, { - "name": "model.layers.23.self_attn.c_attn.bias", + "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 11016192 + "nbytes": 3072, + "byteOffset": 11017216 }, { - "name": "model.layers.23.self_attn.c_attn.weight", + "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -1665,48 +1597,48 @@ "byteOffset": 11020288 }, { - "name": "model.layers.23.self_attn.o_proj.weight", + "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 17311744 }, { - "name": "model.layers.24.input_layernorm.weight", + "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 22030336 + "nbytes": 4718592, + "byteOffset": 17315840 }, { - "name": "model.layers.24.post_attention_layernorm.weight", + "name": "model.layers.16.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 22033408 + "byteOffset": 22034432 }, { - "name": "model.layers.24.self_attn.c_attn.bias", + "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 22036480 + "nbytes": 3072, + "byteOffset": 22037504 }, { - "name": "model.layers.24.self_attn.c_attn.weight", + "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -1717,94 +1649,75 @@ "byteOffset": 22040576 }, { - "name": "model.layers.24.self_attn.o_proj.weight", + "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 28332032 }, { - "name": "model.layers.25.input_layernorm.weight", + "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 33050624 + "nbytes": 4718592, + "byteOffset": 28336128 }, { - "name": "model.layers.25.post_attention_layernorm.weight", + "name": "model.layers.17.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33053696 + "byteOffset": 33054720 }, { - "name": "model.layers.25.self_attn.c_attn.bias", + "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 33056768 + "nbytes": 3072, + "byteOffset": 33057792 } ], - "md5sum": "9bd20d328ef42e9cb0daeea34f8f1665" + "md5sum": "cbdd25fd19637b648dff1256b0362d00" }, { - "dataPath": "params_shard_45.bin", + "dataPath": "params_shard_43.bin", "format": "raw-shard", - "nbytes": 27525120, + "nbytes": 55050240, "records": [ { - "name": "model.layers.26.mlp.down_proj.weight", + "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ - 1536, - 8960 + 17920, + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 27525120, + "nbytes": 55050240, "byteOffset": 0 } ], - "md5sum": "3bf660c4dd495b4cb010e090958b399f" + "md5sum": "639c98457bec8c7e83c2c1da8b9410f2" }, { - "dataPath": "params_shard_46.bin", + "dataPath": "params_shard_44.bin", "format": "raw-shard", - "nbytes": 55050240, + "nbytes": 27525120, "records": [ { - "name": "model.layers.26.mlp.gate_up_proj.weight", - "shape": [ - 17920, - 1536 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 55050240, - "byteOffset": 0 - } - ], - "md5sum": "1d8d43c1f77c22de3ed30d08f84db8c1" - }, - { - "dataPath": "params_shard_47.bin", - "format": "raw-shard", - "nbytes": 27525120, - "records": [ - { - "name": "model.layers.27.mlp.down_proj.weight", + "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -1815,15 +1728,15 @@ "byteOffset": 0 } ], - "md5sum": "5dba30ad09b4e0832d4b66cbab7cbb65" + "md5sum": "c335e79b08a2fe03d63fa514b658d909" }, { - "dataPath": "params_shard_48.bin", + "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.27.mlp.gate_up_proj.weight", + "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -1834,15 +1747,15 @@ "byteOffset": 0 } ], - "md5sum": "3e3f1c5f902e1612e29d8d4bfce8c13f" + "md5sum": "6e0525006bc32f3ec81492080dc9f7f2" }, { - "dataPath": "params_shard_49.bin", + "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.3.mlp.down_proj.weight", + "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -1853,15 +1766,15 @@ "byteOffset": 0 } ], - "md5sum": "dafb2ba9ed30881ead0f758444c710e9" + "md5sum": "d968c5af427bccf73fb824e0be561cd9" }, { - "dataPath": "params_shard_50.bin", + "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.3.mlp.gate_up_proj.weight", + "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -1872,15 +1785,34 @@ "byteOffset": 0 } ], - "md5sum": "fae672c0483259b7c6f78ebe8c9bc007" + "md5sum": "80e8b32cb11f59a6022acf852eea7ace" }, { - "dataPath": "params_shard_51.bin", + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 1536, + 8960 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "c075bcffc12a1d712ca76c1471ae444a" + }, + { + "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { - "name": "model.layers.25.self_attn.c_attn.weight", + "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -1891,48 +1823,48 @@ "byteOffset": 0 }, { - "name": "model.layers.25.self_attn.o_proj.weight", + "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 6291456 }, { - "name": "model.layers.26.input_layernorm.weight", + "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 11010048 + "nbytes": 4718592, + "byteOffset": 6295552 }, { - "name": "model.layers.26.post_attention_layernorm.weight", + "name": "model.layers.18.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 11013120 + "byteOffset": 11014144 }, { - "name": "model.layers.26.self_attn.c_attn.bias", + "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 11016192 + "nbytes": 3072, + "byteOffset": 11017216 }, { - "name": "model.layers.26.self_attn.c_attn.weight", + "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -1943,48 +1875,48 @@ "byteOffset": 11020288 }, { - "name": "model.layers.26.self_attn.o_proj.weight", + "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 17311744 }, { - "name": "model.layers.27.input_layernorm.weight", + "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 22030336 + "nbytes": 4718592, + "byteOffset": 17315840 }, { - "name": "model.layers.27.post_attention_layernorm.weight", + "name": "model.layers.19.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 22033408 + "byteOffset": 22034432 }, { - "name": "model.layers.27.self_attn.c_attn.bias", + "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 22036480 + "nbytes": 3072, + "byteOffset": 22037504 }, { - "name": "model.layers.27.self_attn.c_attn.weight", + "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -1995,7 +1927,17 @@ "byteOffset": 22040576 }, { - "name": "model.layers.27.self_attn.o_proj.weight", + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28332032 + }, + { + "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 1536, 1536 @@ -2003,48 +1945,57 @@ "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, - "byteOffset": 28332032 + "byteOffset": 28336128 }, { - "name": "model.layers.3.input_layernorm.weight", + "name": "model.layers.20.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33050624 + "byteOffset": 33054720 }, { - "name": "model.layers.3.post_attention_layernorm.weight", + "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33053696 - }, + "byteOffset": 33057792 + } + ], + "md5sum": "73cd2787ae936b6aa7f0c3c9c85b3ff2" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.3.self_attn.c_attn.bias", + "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ - 2048 + 17920, + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 33056768 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "e15198cbf013718fbac52a1a8d94a4bd" + "md5sum": "136a14789a41480fcc08d89e66ca2501" }, { - "dataPath": "params_shard_52.bin", + "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.4.mlp.down_proj.weight", + "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -2055,15 +2006,15 @@ "byteOffset": 0 } ], - "md5sum": "8ef490e2d5fdd10b563790c6c8ddf751" + "md5sum": "a3f54fbb03f00cc32caaa8b0a5cb8394" }, { - "dataPath": "params_shard_53.bin", + "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.4.mlp.gate_up_proj.weight", + "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -2074,15 +2025,15 @@ "byteOffset": 0 } ], - "md5sum": "0d9f22936600b212ad7509684535ed16" + "md5sum": "2ffd1ebbe30c22f30f61a3d92c032d1b" }, { - "dataPath": "params_shard_54.bin", + "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.5.mlp.down_proj.weight", + "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -2093,15 +2044,15 @@ "byteOffset": 0 } ], - "md5sum": "b877bb9f1825407f4828cde2906e483c" + "md5sum": "2a50d409322f1488c49d5287dd21c3b8" }, { - "dataPath": "params_shard_55.bin", + "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.5.mlp.gate_up_proj.weight", + "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -2112,15 +2063,15 @@ "byteOffset": 0 } ], - "md5sum": "4aeb2df9e320197a7328f274ccc1e0c6" + "md5sum": "89a049c13e34f896861d5989e7af37ca" }, { - "dataPath": "params_shard_56.bin", + "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.6.mlp.down_proj.weight", + "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -2131,34 +2082,15 @@ "byteOffset": 0 } ], - "md5sum": "4159b8469797370880dec9cef854c8b1" - }, - { - "dataPath": "params_shard_57.bin", - "format": "raw-shard", - "nbytes": 55050240, - "records": [ - { - "name": "model.layers.6.mlp.gate_up_proj.weight", - "shape": [ - 17920, - 1536 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 55050240, - "byteOffset": 0 - } - ], - "md5sum": "f6f34569657363a613bb38ffa4a8ce25" + "md5sum": "60e61d289310f677b122617d45a9c7e4" }, { - "dataPath": "params_shard_58.bin", + "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { - "name": "model.layers.3.self_attn.c_attn.weight", + "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -2169,48 +2101,48 @@ "byteOffset": 0 }, { - "name": "model.layers.3.self_attn.o_proj.weight", + "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 6291456 }, { - "name": "model.layers.4.input_layernorm.weight", + "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 11010048 + "nbytes": 4718592, + "byteOffset": 6295552 }, { - "name": "model.layers.4.post_attention_layernorm.weight", + "name": "model.layers.21.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 11013120 + "byteOffset": 11014144 }, { - "name": "model.layers.4.self_attn.c_attn.bias", + "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 11016192 + "nbytes": 3072, + "byteOffset": 11017216 }, { - "name": "model.layers.4.self_attn.c_attn.weight", + "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -2221,48 +2153,48 @@ "byteOffset": 11020288 }, { - "name": "model.layers.4.self_attn.o_proj.weight", + "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 17311744 }, { - "name": "model.layers.5.input_layernorm.weight", + "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 22030336 + "nbytes": 4718592, + "byteOffset": 17315840 }, { - "name": "model.layers.5.post_attention_layernorm.weight", + "name": "model.layers.22.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 22033408 + "byteOffset": 22034432 }, { - "name": "model.layers.5.self_attn.c_attn.bias", + "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 22036480 + "nbytes": 3072, + "byteOffset": 22037504 }, { - "name": "model.layers.5.self_attn.c_attn.weight", + "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -2273,7 +2205,17 @@ "byteOffset": 22040576 }, { - "name": "model.layers.5.self_attn.o_proj.weight", + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28332032 + }, + { + "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 1536, 1536 @@ -2281,48 +2223,57 @@ "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, - "byteOffset": 28332032 + "byteOffset": 28336128 }, { - "name": "model.layers.6.input_layernorm.weight", + "name": "model.layers.23.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33050624 + "byteOffset": 33054720 }, { - "name": "model.layers.6.post_attention_layernorm.weight", + "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33053696 - }, + "byteOffset": 33057792 + } + ], + "md5sum": "2859dd48c6a504a6ff5fc777a5d0eb01" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.6.self_attn.c_attn.bias", + "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ - 2048 + 17920, + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 33056768 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "4b9635bfbb9a0f8e379594d577e24e79" + "md5sum": "00c023336d6889bb9d24bcb3762ff819" }, { - "dataPath": "params_shard_59.bin", + "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.7.mlp.down_proj.weight", + "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -2333,15 +2284,15 @@ "byteOffset": 0 } ], - "md5sum": "6f06f08ee4e830a7f35fad5489e2df96" + "md5sum": "725c3d334464020a85016e629f58eda8" }, { - "dataPath": "params_shard_60.bin", + "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.7.mlp.gate_up_proj.weight", + "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -2352,15 +2303,15 @@ "byteOffset": 0 } ], - "md5sum": "6a51b2acc4eb22511d78c73146e97c57" + "md5sum": "09c4056fb69cfcce7f2c9829ba155e28" }, { - "dataPath": "params_shard_61.bin", + "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.8.mlp.down_proj.weight", + "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -2371,15 +2322,15 @@ "byteOffset": 0 } ], - "md5sum": "422c9f65efd3c517f5f597ff198b9b54" + "md5sum": "898fa6e97b95ddc00a3a685714dd2056" }, { - "dataPath": "params_shard_62.bin", + "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { - "name": "model.layers.8.mlp.gate_up_proj.weight", + "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 @@ -2390,15 +2341,15 @@ "byteOffset": 0 } ], - "md5sum": "194381860a8101e6639b37c321b03e53" + "md5sum": "ae5ce4d265237aae0fd576bb6f528dca" }, { - "dataPath": "params_shard_63.bin", + "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { - "name": "model.layers.9.mlp.down_proj.weight", + "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 1536, 8960 @@ -2409,34 +2360,15 @@ "byteOffset": 0 } ], - "md5sum": "dcc2852e43b9cca8d247abb596a6cfd3" + "md5sum": "58375129ef74f23538d522ef09315b51" }, { - "dataPath": "params_shard_64.bin", - "format": "raw-shard", - "nbytes": 55050240, - "records": [ - { - "name": "model.layers.9.mlp.gate_up_proj.weight", - "shape": [ - 17920, - 1536 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 55050240, - "byteOffset": 0 - } - ], - "md5sum": "31b0adf54bda832e421327cc270f38de" - }, - { - "dataPath": "params_shard_65.bin", + "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { - "name": "model.layers.6.self_attn.c_attn.weight", + "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -2447,48 +2379,48 @@ "byteOffset": 0 }, { - "name": "model.layers.6.self_attn.o_proj.weight", + "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 6291456 }, { - "name": "model.layers.7.input_layernorm.weight", + "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 11010048 + "nbytes": 4718592, + "byteOffset": 6295552 }, { - "name": "model.layers.7.post_attention_layernorm.weight", + "name": "model.layers.24.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 11013120 + "byteOffset": 11014144 }, { - "name": "model.layers.7.self_attn.c_attn.bias", + "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 11016192 + "nbytes": 3072, + "byteOffset": 11017216 }, { - "name": "model.layers.7.self_attn.c_attn.weight", + "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -2499,48 +2431,48 @@ "byteOffset": 11020288 }, { - "name": "model.layers.7.self_attn.o_proj.weight", + "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ - 1536, - 1536 + 2048 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4718592, + "nbytes": 4096, "byteOffset": 17311744 }, { - "name": "model.layers.8.input_layernorm.weight", + "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ + 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 3072, - "byteOffset": 22030336 + "nbytes": 4718592, + "byteOffset": 17315840 }, { - "name": "model.layers.8.post_attention_layernorm.weight", + "name": "model.layers.25.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 22033408 + "byteOffset": 22034432 }, { - "name": "model.layers.8.self_attn.c_attn.bias", + "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ - 2048 + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 22036480 + "nbytes": 3072, + "byteOffset": 22037504 }, { - "name": "model.layers.8.self_attn.c_attn.weight", + "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -2551,7 +2483,17 @@ "byteOffset": 22040576 }, { - "name": "model.layers.8.self_attn.o_proj.weight", + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28332032 + }, + { + "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 1536, 1536 @@ -2559,48 +2501,76 @@ "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, - "byteOffset": 28332032 + "byteOffset": 28336128 }, { - "name": "model.layers.9.input_layernorm.weight", + "name": "model.layers.26.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33050624 + "byteOffset": 33054720 }, { - "name": "model.layers.9.post_attention_layernorm.weight", + "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 33053696 - }, + "byteOffset": 33057792 + } + ], + "md5sum": "50113496b65c91d08a408ca51382050c" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 55050240, + "records": [ { - "name": "model.layers.9.self_attn.c_attn.bias", + "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ - 2048 + 17920, + 1536 ], "dtype": "float16", "format": "f32-to-bf16", - "nbytes": 4096, - "byteOffset": 33056768 + "nbytes": 55050240, + "byteOffset": 0 } ], - "md5sum": "80e3191e706ecd10535e81ffa38fc3c2" + "md5sum": "77bba71a9b67ed4377d409949e9ad145" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 27525120, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 1536, + 8960 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 27525120, + "byteOffset": 0 + } + ], + "md5sum": "0d6ad7eb791649a022f869b1edeb5355" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", - "nbytes": 11013120, + "nbytes": 11023360, "records": [ { - "name": "model.layers.9.self_attn.c_attn.weight", + "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 2048, 1536 @@ -2611,7 +2581,17 @@ "byteOffset": 0 }, { - "name": "model.layers.9.self_attn.o_proj.weight", + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 6291456 + }, + { + "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 1536, 1536 @@ -2619,7 +2599,27 @@ "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, - "byteOffset": 6291456 + "byteOffset": 6295552 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 11014144 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 11017216 }, { "name": "model.norm.weight", @@ -2629,10 +2629,10 @@ "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, - "byteOffset": 11010048 + "byteOffset": 11020288 } ], - "md5sum": "554e351ae89de4c09af73e23fd71576f" + "md5sum": "29c619ad6174a67fe28726e26cc3974d" } ] } \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin index c8d5890a7299efdd307ef3834f18961a90ba1fb3..3a7f2ba70622aceeac41ea29f608d0e32ce76f79 100644 --- a/params_shard_0.bin +++ b/params_shard_0.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43256d0b8f0c812f4ea9e707c40695df8e357fda9eb562d6728a237a534b7f7d +oid sha256:94273a5d0232f316bd22dff6d3c74e5d3a3ec943550d50c17e986248bfad3a40 size 466747392 diff --git a/params_shard_1.bin b/params_shard_1.bin index 5c143919630c70c5dce0573437f6efcd29319c49..1d0eac5aec787049772895960a6be084bcdc1fcd 100644 --- a/params_shard_1.bin +++ b/params_shard_1.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:deb7fe1b6af71a7ed9f5143f6f05feef2de12625e98f072098b8c3ecf4664578 +oid sha256:9f48893f4c00c5aebfa08fd4670be6a7feb1cfd3bcee56f1ae8247bbc8bd049a size 55050240 diff --git a/params_shard_10.bin b/params_shard_10.bin index fc4816e5b898653aa776e53f4871acc3baf90153..0b25770216ea7c894d7e82dad62ba104cff2a6eb 100644 --- a/params_shard_10.bin +++ b/params_shard_10.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8b1a08a299b0cbcec7b00e7edd5ee7e1fadea6d40fd67302d09d8f690f65bcfc -size 27525120 +oid sha256:5e1150a9235cae04dedd70dceb6d04b68063ba898adbccc3a6eae532e167d813 +size 55050240 diff --git a/params_shard_11.bin b/params_shard_11.bin index aefeb85f3c57996d3cb74b66352d4b818b676668..1041ce7b7d2dec7e4fe26787617f515e7f00fe5b 100644 --- a/params_shard_11.bin +++ b/params_shard_11.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:adef7a955ab6fb904a2a487995eee313a7355ed8fee992713318cd0ee06e3d50 -size 55050240 +oid sha256:847367775f9a7ed97d56b9b4aa085e9d21e6119c81b4f64ada061e5ec04def19 +size 27525120 diff --git a/params_shard_12.bin b/params_shard_12.bin index c78640d26d5514d9a6733ebff6d91c2b7fa04643..8ffb3d4c0e2ec2bd73d6e6cc39a15982fa7e593b 100644 --- a/params_shard_12.bin +++ b/params_shard_12.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1818ed266119190b631d79b35f61c9b9ea1498ecb39fbc993f373bfe07e8a6c1 -size 27525120 +oid sha256:ee74e3d0b230249cc282287f91b65113e4e6c63f1c90af6a7d9c6416f615b466 +size 55050240 diff --git a/params_shard_13.bin b/params_shard_13.bin index f3c99aeb9b605cf4f02bfddac8f1c7e171622f20..4f9a60ccc923e93f1203a43b296db0d40387f793 100644 --- a/params_shard_13.bin +++ b/params_shard_13.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a69ef695f83288a716f46ca0b436bab0aa3843ac3e844c795cb5c5c3bd00dc8c -size 55050240 +oid sha256:580e39e24a773bc9a2ad49ec4705f6ee63043369f0cc608fed39fbedbcff5cf0 +size 27525120 diff --git a/params_shard_14.bin b/params_shard_14.bin index b374abcf5c7e43a66396d7b60450ca1e116a7b9c..52b81e1bdef5ef0a16b4abbba3098627d96b573d 100644 --- a/params_shard_14.bin +++ b/params_shard_14.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:449e6dba8fedfb24594ee2e2f7786f434735d7ef2cb1adfdee4ccb161fdb78a0 -size 27525120 +oid sha256:8fd13328e1345ea0fdf8c1307bd555ba86a80954f3af9c537f40769226a03b10 +size 33060864 diff --git a/params_shard_15.bin b/params_shard_15.bin index a48d2c36f12b74f0396e5ef2f16bf8b33cf4affb..74b822fcdba9cddff95ccbe326cf82fe04c87550 100644 --- a/params_shard_15.bin +++ b/params_shard_15.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d843c125f9b29289555a46ddb82eac8da82c2a58abd24425ed966a831f700c76 +oid sha256:9c28b7cf43e1e75142720bed8c0b81d382385bb75c707ae19f157b534f9f6c98 size 55050240 diff --git a/params_shard_16.bin b/params_shard_16.bin index c296f67666588a22b37d05e0f705437dc2be2a4c..477f1cd3f0c7abd5c0b6d585eb2cb95ba83ef1a1 100644 --- a/params_shard_16.bin +++ b/params_shard_16.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6907647e513bb68e61bcd9ae28b260b42156ba31245d844af153cfaaa3846b7d -size 33060864 +oid sha256:e6ba31cfd7078dd6670de840a71f45b0213a95ab9db7d0dce90909380bc77344 +size 27525120 diff --git a/params_shard_17.bin b/params_shard_17.bin index 465adcef307d7c0c4b8563119ffe71cd41ba2d8f..29ef1cb5bc6f19b687ea132210b4092372cf4b1e 100644 --- a/params_shard_17.bin +++ b/params_shard_17.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bc929db04646a836322e8bf7cdf27290ad9aa06ac578b42fcdd3a1deec0b969c -size 27525120 +oid sha256:28795da2c98c13ff4969befc676b2b1498e546f511081964512dfa4198cef7f9 +size 55050240 diff --git a/params_shard_18.bin b/params_shard_18.bin index 222d93774a4842a7291870830c9c19f1e78a7958..b3e74d4bc04843105368f82496adaf034e5f399a 100644 --- a/params_shard_18.bin +++ b/params_shard_18.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98fe8b376d94da887eb2239a6c8ababe4ffb43b4a844009df634b6eee810cb87 -size 55050240 +oid sha256:13d0710846a52f8b29163de4dccdbc2564cb7326e24a791d2071944433a48455 +size 27525120 diff --git a/params_shard_19.bin b/params_shard_19.bin index ccf6bfe6b0f90f1ded01c5013130b49fdaed0b01..a66038e69b549e9f35d4defe9b90728e6e20e4da 100644 --- a/params_shard_19.bin +++ b/params_shard_19.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1efba48ad4e5b6350840e437e59d99f8259357098ee18f26523058e64288231 -size 27525120 +oid sha256:80b08736da97358973209674ec7d1338f141a5a784c4829c651ffe80bafcb4b5 +size 55050240 diff --git a/params_shard_2.bin b/params_shard_2.bin index 9259a5dd36fb215ec47e8f229bc2ed946689f358..6a162c98a7dc54760eac738ca13e946c77717d45 100644 --- a/params_shard_2.bin +++ b/params_shard_2.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cfa31db2ceea48f0283cd2790cf3ebd78280772f9b93abb45213630b6d1cd542 -size 27535360 +oid sha256:320c7e9478a4b42ede149b8ddb7e551137f28de3ae409bddc5346161aaee37fe +size 27525120 diff --git a/params_shard_20.bin b/params_shard_20.bin index 69cc84728fab70b291592637ae1ae046e442f30e..43a0b3bf56e85ff2ede2fc6aeafccd226980278b 100644 --- a/params_shard_20.bin +++ b/params_shard_20.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a5dd1b569f309062db653b3ab82c5094d122a217fe31e728da04ea95ba10215 -size 55050240 +oid sha256:276b7878180dd1bd37eb33e7ef4adffb76fb147a85789a23ba06a2bd496c41ce +size 27525120 diff --git a/params_shard_21.bin b/params_shard_21.bin index 6d2d771eafd3cd5d144edbb0c60937f602304001..2a71a979f82452aa6ea6e5eee2d155e565d37dc3 100644 --- a/params_shard_21.bin +++ b/params_shard_21.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a384fdab3fed67beb6dcecc11e07a7a3abc2509b45d2435dec315dcfb5233599 -size 27525120 +oid sha256:64a21140ae18945af8b4d14f0e8a44607ee9e65002040505c566f01ceb5ec648 +size 33060864 diff --git a/params_shard_22.bin b/params_shard_22.bin index d4fc6e92cf57f41b493adc3083357ebbcf558acc..7b33783247987d097e871eb42f7a0bfa9db219f7 100644 --- a/params_shard_22.bin +++ b/params_shard_22.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4df39d8fee545fb91a042cb7baf9ff19c81c090abea017764d1a25859f717f43 +oid sha256:ff99ab52581293e3b1f5d04ae469da981480335c5473dfbec12f9c608efa8978 size 55050240 diff --git a/params_shard_23.bin b/params_shard_23.bin index df7b3901fd10a9d02d7a2c360af40b39cc6b8a85..a984fef32408c9685f46ac764fa942f075f321e2 100644 --- a/params_shard_23.bin +++ b/params_shard_23.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f82e64e2554048138286ff10af732add567ecd58ecbaae1b6f137a14f94b6860 -size 33060864 +oid sha256:d8ebcb3fc9e9b3d34044d91b663adb90c0c64ac626b16fd272d9f98c7697ceaf +size 27525120 diff --git a/params_shard_24.bin b/params_shard_24.bin index 8892d7ede473d5e4da25992bf341c0ba6d0cc45b..a24236e2d422def2dc2c39ba7bce41d23a255bc4 100644 --- a/params_shard_24.bin +++ b/params_shard_24.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e84359eb974a34887946eaad63c9c67c225a19482f38f3d3fd0535dafaef4f01 -size 27525120 +oid sha256:3a4c22ba0ecfc4ebe4efb3ac4965d430663abff3e94c74813c3fd9978ae9f84d +size 55050240 diff --git a/params_shard_25.bin b/params_shard_25.bin index fa9ebf26bfc2afd442963bcdec2d7c117e0b5f99..c1452a5b2ad8f3ef3e2f4c8cf4afdefc1c9a09cd 100644 --- a/params_shard_25.bin +++ b/params_shard_25.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:093fa2e1c175a290654de5ff3b0082189265bf89bf6ac9678e7076e8e4f1bd54 -size 55050240 +oid sha256:1d5f3b525cd3a9a2a809e107998308684809fed8e48296f195022ef26300b70e +size 27525120 diff --git a/params_shard_26.bin b/params_shard_26.bin index a5cd2df9dd693d0b8e9ef213c2ee5760e67d7179..8b2a7b4e3d72afa5e09b1d880cbf22b09074d9c7 100644 --- a/params_shard_26.bin +++ b/params_shard_26.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f77ba1447d90a3b17db86f2d8702243cdde48a48d7ffc86dc0280ce953a3757 -size 27525120 +oid sha256:1c317068124b7add21e049e1b83bff85ce5215426bb184a0f6239b2c273b3224 +size 55050240 diff --git a/params_shard_27.bin b/params_shard_27.bin index 316001ebec70c1de02271344425b5be1a44c38d0..db20dcc9818cdeeead7d51753bf8dd7f78ce6a0f 100644 --- a/params_shard_27.bin +++ b/params_shard_27.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:10d5003fa57b73e7efbf709f523372e5e57a6eb3b5c70de4353d7c78db5f5a54 -size 55050240 +oid sha256:797709528e8159253b3766d1bb500a6bf176b78f1af95b32a42f12473bac4265 +size 27525120 diff --git a/params_shard_28.bin b/params_shard_28.bin index d58104f1253d31ff6e9685516b3b0dea392aa947..7921db38bc7c9c95fd0810dc219b3034d5ec5441 100644 --- a/params_shard_28.bin +++ b/params_shard_28.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44dbc3426ce5c3a819ee8d7cc8f3423b69cac54b749633e46dc61e14f6652f61 -size 27525120 +oid sha256:b4d0342ca260d8f1b939669678d3172ac78e2099f0ee6ee79ce12acb10fed6a4 +size 33060864 diff --git a/params_shard_29.bin b/params_shard_29.bin index e9c6ab75fb58e8f8d6ca7edfc8b498f63e0593ad..e4f8eab1bba6d552cbfcc26a799d081991945e2b 100644 --- a/params_shard_29.bin +++ b/params_shard_29.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:179573335b56c2d367c7222e16ea1a2ebed2e09d18face3a640d5ed55b31159b +oid sha256:a0d6106261d8f4c5538e122de63e72def7ab8ef0726fa03486e7d1a71e0b187b size 55050240 diff --git a/params_shard_3.bin b/params_shard_3.bin index 6842e5370fac2ccde2ebab9615347ea78d83d4ee..ac8c7767c0e6b602048316c7e96e9769d537b132 100644 --- a/params_shard_3.bin +++ b/params_shard_3.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebf06be4638d2fc49381c37acd29778278510558ba16bf985c3cc7f6189c6baf -size 27525120 +oid sha256:61f4164679f238e92f28b009c00309d86f47481271ad9d5a51289804abd4582c +size 55050240 diff --git a/params_shard_30.bin b/params_shard_30.bin index cdb4b45d497b796aecdc88960ea92602103b841e..7ce9cf73aa0721588339f67d8f26f2649cb231c2 100644 --- a/params_shard_30.bin +++ b/params_shard_30.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f48a91d7ffd46ea606b64a11f169c91c9e6d3949e2876f36b8b98325c135007e -size 33060864 +oid sha256:8a5673680d5cafef38bd4782ab8c3904718f2b1c78d3ed51a6c49fc9b04866fa +size 27525120 diff --git a/params_shard_31.bin b/params_shard_31.bin index 87adcd92d8c90a17c0dc88988ba992f8b9af6ba4..56daa8877bdb6b1ab78b33e499cfb44d60eb48b1 100644 --- a/params_shard_31.bin +++ b/params_shard_31.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd6a9b312e4c50c3707d7294e2da823b693c8d76b2edc297ad0b14aedfea467d -size 27525120 +oid sha256:1e2dbeacdd8554b3b11f3d40e03f1668a89a82d506e6221da8e24f7500998371 +size 55050240 diff --git a/params_shard_32.bin b/params_shard_32.bin index 7911cbc33eda97d909be89f8e6dedd0a82d66d64..d8a8d01b0854bdb020039b64cbe96cdda6367c1a 100644 --- a/params_shard_32.bin +++ b/params_shard_32.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:00a0a8bf883c445b2ca7832c1e4cf69d554730f7883e51b2053b75cf40c8ec08 -size 55050240 +oid sha256:e5bc4fcccb8f3c3ee62ef7f9cbe52b3302f6c8d2d0a1a2475b69e25d83dca62e +size 27525120 diff --git a/params_shard_33.bin b/params_shard_33.bin index 2c5607280aad361af9951650ae18fde526056230..92238826e67a87d2353b02097c994c5de939c62a 100644 --- a/params_shard_33.bin +++ b/params_shard_33.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0b388cbd7ec068d7e355ddace0f8084fa561ba2cfddf2bd83cb3f1c0474ac15 -size 27525120 +oid sha256:2dc920714790561ed7d0f417c9b9fb622d03a7c7000240cd6bef37cf2ea71bfc +size 55050240 diff --git a/params_shard_34.bin b/params_shard_34.bin index 0719c5f321fc7e06f8ed9c912fff87f29fe9eeed..4d5f39cc8d5c714cd84caecedd0ed259ecd0deee 100644 --- a/params_shard_34.bin +++ b/params_shard_34.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01ee2bf65adcd63fd5129e751e3c5c5d8118a4ba52d48639a9bb9429384d31d3 -size 55050240 +oid sha256:371479f149dad1d147116a1b92076448d7fa34e122c410eb37ea5764b4d05e32 +size 27525120 diff --git a/params_shard_35.bin b/params_shard_35.bin index d28c9901d2b3d7706fc927b8d7eb30aa7b3b392f..0ce93d2f65a778e9f445367689fd8b0c32d6f8b9 100644 --- a/params_shard_35.bin +++ b/params_shard_35.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41270bd5669b5ecac3b60ad096b87be7119afe8ba9b65050e6d3fdbd43c3c3f2 -size 27525120 +oid sha256:f4ad4fdb9d60933c373bfd9cc5f65a0b2a1582e7556a6fa47705379593bec87d +size 33060864 diff --git a/params_shard_36.bin b/params_shard_36.bin index fd39dabdfd3eb64914e68b8d74e5f87c74d469f0..adb8a7b6fdefe937288834576354b130643f7b61 100644 --- a/params_shard_36.bin +++ b/params_shard_36.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62a1feb183eaf5137d15b0889e3163c00738bf24edc7dc0f92cfa3bc33ca6c75 +oid sha256:c3419c2e859b925fd2b1828ce49b8a2a45fd80b747b5f0ed34585c39d7dac6a3 size 55050240 diff --git a/params_shard_37.bin b/params_shard_37.bin index 2dfcf0bc12c3f85ee0449ce863bcdaa0f0cac801..58feab12a94975737b233b5f73a4ca1ce7c3aa2a 100644 --- a/params_shard_37.bin +++ b/params_shard_37.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:03381d0fa4f59b3243a898af57d87935e28643a8e77e2ff452416def1b96c6e2 -size 33060864 +oid sha256:4ccdfb5e4d32660dd7998683ad4b4b064b5cbdf57f410fdcf838695dba3ede17 +size 27525120 diff --git a/params_shard_38.bin b/params_shard_38.bin index 1aef2c6564a8c60ec9067c5dcd91547bbcbfb195..d12da52fb826451e6ccadc6da0bb297caaabc5c1 100644 --- a/params_shard_38.bin +++ b/params_shard_38.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8fbfe7265ec75666e68673f1bc5eaeb410cac5217fe3093abf99d825ba5f891 -size 27525120 +oid sha256:350bef7aad28ee53bc44eb9bd166d3537f828173639fabf97094e5f27f113ffe +size 55050240 diff --git a/params_shard_39.bin b/params_shard_39.bin index 33cac0cb40a57745d5d37d1f216baff0361b0008..4bbbdd009a1058cdc1673a93765a1a9a53ef5712 100644 --- a/params_shard_39.bin +++ b/params_shard_39.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf686aa6fa4bf3d5c4bdebb6efd8e2a20ed931bb42958328c9fc19cd77703df4 -size 55050240 +oid sha256:b150b489c765c4cc6dbdb4ab2fda019304695c60aa60c8801f831c7974bc02db +size 27525120 diff --git a/params_shard_4.bin b/params_shard_4.bin index 86676371b06baaf46dbbaa94802444f8c56ecd4b..f40b5d3d102dafd96f04dc2eaa134a028ad51bcd 100644 --- a/params_shard_4.bin +++ b/params_shard_4.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6123143e5a495eebd4a824682aa78eae0df1c59f8167f9b9c1c158af8292b6bd -size 55050240 +oid sha256:d721b338259ef82c81f041ae383819256838b602e51005fc735b3e103c3ac8a9 +size 27525120 diff --git a/params_shard_40.bin b/params_shard_40.bin index 20ee000dacf87328aa76586f51433864f03dd777..3640d44a843dfe57421b62abb805f687808abbb3 100644 --- a/params_shard_40.bin +++ b/params_shard_40.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4698bdd0cce98f3f6c6d7748d8b21b34f3883d346f8ff954c5629e92df26c17d -size 27525120 +oid sha256:f8c0ca3aa80c3e599bd4d92fdf0c70e933be98d10b03ec0d8763d2f21909e285 +size 55050240 diff --git a/params_shard_41.bin b/params_shard_41.bin index 7287b56b46c918b1a5759d61016e2074a8912cdc..03c3eb042c4aab29fcd88907421ba757e857b728 100644 --- a/params_shard_41.bin +++ b/params_shard_41.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f9f0251fca3e6776fb7e0df0505d18464f838893e72ab2e130a6d6b637d63a39 -size 55050240 +oid sha256:e9c67c94742047daa9d5c4ed31b6fbed6d1768cba8665826e49aaa267d3badf5 +size 27525120 diff --git a/params_shard_42.bin b/params_shard_42.bin index 94cd4f80239571d3b4b4b99a769ab6f7ad46f0a7..1e2dbb05bcee6e5ca285a132c3ea5a1c87643782 100644 --- a/params_shard_42.bin +++ b/params_shard_42.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d8f96cbf4f71e47c597f971b02b4cc0cc6bffe32ed3e3f735f194109fb6fd1d -size 27525120 +oid sha256:e8e44cd509e0318a0608bdd2af75a8ef35029e389ecd09f7f89ffd9877091f77 +size 33060864 diff --git a/params_shard_43.bin b/params_shard_43.bin index 088010a4f55c3edf693c8fa566169c54c9307ccb..bff69c8cebf68c5d198dc22bd2a67686a0ab26a1 100644 --- a/params_shard_43.bin +++ b/params_shard_43.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b31b86c1bc0a55ccbd5c66d01af04f5cc5ddd575c255a5dafdea449a7865d8cc +oid sha256:9755e83da9c89b54d4a2345252ee44d1c3388bfe9f67a265e4b12c3558cf2757 size 55050240 diff --git a/params_shard_44.bin b/params_shard_44.bin index 9f1142e5f7aeec7fadfd1010422de9672b15739a..a1528c017f252b09fd57e13149c538235cca74fd 100644 --- a/params_shard_44.bin +++ b/params_shard_44.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5274d241190d48484674d6626862d4dacca47dcb1a309d15dd077c925c4d974 -size 33060864 +oid sha256:6ff4ac3e08407d6b9dc2ccf54152f034a248789fc79ba734a66bf27936748795 +size 27525120 diff --git a/params_shard_45.bin b/params_shard_45.bin index 20f6020d7d856802a98d9a0b5a25d2d72443b62d..7826198660bf488a0cfe12d5e2ca76e85c0c3cfa 100644 --- a/params_shard_45.bin +++ b/params_shard_45.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08fd0059a61104cc99b699cd7ba2ca0dbb49c70f33ad55e79ae1229cedff5bea -size 27525120 +oid sha256:a02fc140d272f78a5c78ff8b11298843d2f37fa1fccfd20138120e1bb4ae212d +size 55050240 diff --git a/params_shard_46.bin b/params_shard_46.bin index c2cc946ebf56a2f70983500361783dd1f087314a..9375560342d134d54d25b9ac6025c7528ce7ee5f 100644 --- a/params_shard_46.bin +++ b/params_shard_46.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f126b5b6bace80292d0a0fc05e422d6155c65a92119b842901e929325131ce9 -size 55050240 +oid sha256:b04a632d1f27795b16af5a571fb3dc97b123ea3feb2788a6699547b22800ff1a +size 27525120 diff --git a/params_shard_47.bin b/params_shard_47.bin index f07074b549b1f4efd3a076777886444a58be1664..f9762c9b7d8ded9e9e8807281e62f99731749249 100644 --- a/params_shard_47.bin +++ b/params_shard_47.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01a5c73cc359c686d657a7af534dee374b1f3efb13a8963b786bb4ee80ac93ce -size 27525120 +oid sha256:b20bd2fca44db6684afc54d8c8c74841152c483fcd214be510c6b70d6d6a5d04 +size 55050240 diff --git a/params_shard_48.bin b/params_shard_48.bin index 1d136517d710b89f8d8ba02cecda65e5d0d3340d..34eb4c446ed9b5a5beb8aa2be1ab3fcf0d8c372a 100644 --- a/params_shard_48.bin +++ b/params_shard_48.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc5ba3bd64509b2e3c8552e780564318710f41d0d9181d89ce5d025a40eaa236 -size 55050240 +oid sha256:04cc735e97f44125b7aef33f149a4fbfb93061cbccd344173fa32ab4ed319bd1 +size 27525120 diff --git a/params_shard_49.bin b/params_shard_49.bin index 2d642aead1c4a0b8f014f93a0ca08593854043f3..6e85601668c01704bd2a0e4a6c901c4c24196bf4 100644 --- a/params_shard_49.bin +++ b/params_shard_49.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbff323956b5163b6fb8a126ec32a4709e1c2e7738dcc5fd12a70b7a238adcb5 -size 27525120 +oid sha256:da9ce3a0353e0c1969e66338e1c5265b24aabb05412fc9fdfc5b4b48d212c586 +size 33060864 diff --git a/params_shard_5.bin b/params_shard_5.bin index 63b513796acd8ae00d1eb7dad8c49561ed218206..33284fbbeb5b115cb73537f6de8965d715375e7f 100644 --- a/params_shard_5.bin +++ b/params_shard_5.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7dbb502b1851feac98cbd43401dc522a59f84328b7283f6ea589223306f540ed -size 27525120 +oid sha256:12926276bb36201240a01dd532cfa635bf7ae9d1127d50bb16347776efa3867b +size 55050240 diff --git a/params_shard_50.bin b/params_shard_50.bin index 496e8bb0980a490e43754278ad9baecb411c1e4b..bd064916e93bf23a13ccf8b3a1f9a799803c7ec1 100644 --- a/params_shard_50.bin +++ b/params_shard_50.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec244f30f958e360cc2ad35d50bda88af6d037cc30f2db3e8228c4c1ee687a38 +oid sha256:3fe35eaa0c219453bcfcae4175f9fe8e25983afd7d82fb676c6335f835ba39d1 size 55050240 diff --git a/params_shard_51.bin b/params_shard_51.bin index 735a7245d0c49ae366d5f1c1460ffd740d9e5bb3..e806fd7d482fd14d91df50d4347e23467472a71f 100644 --- a/params_shard_51.bin +++ b/params_shard_51.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e8f7508ecc047dee87531ac31cf76db7bf24b165948e0ae7f5a53cab47bf155 -size 33060864 +oid sha256:0f737bab297728d8fb9de4d6c64f5c972824d8a4baa1103682efad4a6200ca31 +size 27525120 diff --git a/params_shard_52.bin b/params_shard_52.bin index 6a0ca80b9ebcf6fe68e9834fdbd609efce3dac78..194d50304fad41a5fabe96d94fa2938195519a2a 100644 --- a/params_shard_52.bin +++ b/params_shard_52.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e29ae520068cc5441f134fccb86f107caae8b725c77f0603d0dc2c3253518422 -size 27525120 +oid sha256:07676e825d750264fa49a426693214d38ab8eb4972b373a7165da8857179b34d +size 55050240 diff --git a/params_shard_53.bin b/params_shard_53.bin index a084a23f19760014ef7fcd567d635392c978afa1..9fc12b5ae7e83e962f11b57ca88d1cbdc11950ca 100644 --- a/params_shard_53.bin +++ b/params_shard_53.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:14cfa61c3bc226d02d330649de62596e21c32325bc2b7d4156154d35b5c47b1d -size 55050240 +oid sha256:324fdfcf9690b0191b8bd33592ddd938573862815402bce36946e257e2561cf9 +size 27525120 diff --git a/params_shard_54.bin b/params_shard_54.bin index 0ff863c36a516cff67a10b7f12e6833a698351c0..993523880dc8fadf9c1fe385baa9a4fd55748085 100644 --- a/params_shard_54.bin +++ b/params_shard_54.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:160e946c36b38df78c928423d271c5a73db2b3418267911acd15676ad6079db4 -size 27525120 +oid sha256:15e360797bfa6522b2ee3f178d2e2188375bfc4c7783428f03c4ba51a78c62b5 +size 55050240 diff --git a/params_shard_55.bin b/params_shard_55.bin index 6cfc91585c4840874188cbd2db74b91a6986b71a..53c033e05310495d837fd5b3e674566f34201b98 100644 --- a/params_shard_55.bin +++ b/params_shard_55.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:45b7ab6eb19f69b8d4efb2c973497f17eb0de905199739fbed9321545f638bf6 -size 55050240 +oid sha256:5b5086453fc0477be6ab7e9cd989ac1e9e674694fa21ae47324d389330e6f6c0 +size 27525120 diff --git a/params_shard_56.bin b/params_shard_56.bin index a640c21c26fb11d9c19b13c926444cead19e15ce..6281877dd86919df92ace14ee20669a443640000 100644 --- a/params_shard_56.bin +++ b/params_shard_56.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89d8eef59460851ba02c18bc34c3b9a7cd7791b53331eccb87a7471d23f21cdc -size 27525120 +oid sha256:5e4cfcb07256dc5a55fdb5f212f88ba2f11ebe19c7cd729f115f6ed7b1ab16de +size 33060864 diff --git a/params_shard_57.bin b/params_shard_57.bin index f26a0980d79fed385581d91a795e64099c4db546..168ecafa91406275fa823cc895497e328fd48463 100644 --- a/params_shard_57.bin +++ b/params_shard_57.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89dbbe8c9561c0c11bd9d5bbd27b5f262b4b7326f8067f784b61e5199b590223 +oid sha256:2819215224068835be1ddf6eb7638eba33ebacc7b03e67aa1ec893a0f997c154 size 55050240 diff --git a/params_shard_58.bin b/params_shard_58.bin index cb58415e85f3d36c3cc25cb3d47a3c568bdd0f3a..35539abf23ad20da8d622e64109601acef7bbeb6 100644 --- a/params_shard_58.bin +++ b/params_shard_58.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:842f81ce0d3e5215f524d07f2924ab16529311714cc9d9456981f7adf8f3a11e -size 33060864 +oid sha256:bc6adaa9ce55ba772129b771d8610ebc05256e7c7c381d09e79ed91fff423b46 +size 27525120 diff --git a/params_shard_59.bin b/params_shard_59.bin index b1540da52a0319930ac0b1a49ea42c8e510739a6..b839e1af6db561cc0600f099fc32dc87f3e0fd4f 100644 --- a/params_shard_59.bin +++ b/params_shard_59.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:819ad6faa0011ce2a26d9cd3027d031e51f3fccc9ec9eec1fe9aa04d7c95af4e -size 27525120 +oid sha256:544a11b17f68b6686acc00f2d92aefd5d4ef24b4dbd8f6d4ed43c481f3c711cd +size 55050240 diff --git a/params_shard_6.bin b/params_shard_6.bin index 3a809cb24af573748bf9ea3a088085dd737c82ec..237c3ad331afe24f75ce60da12b079eab572f546 100644 --- a/params_shard_6.bin +++ b/params_shard_6.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:04bb2fd453410e41bc3cdcbdc94d81d0834fd49dfb0d66179eba2a578fcef558 -size 55050240 +oid sha256:ac7ecb226e6bb71fd6912ef3aafdb27f91e1ed4e378ce2caddf3fe66f5ab58d9 +size 27525120 diff --git a/params_shard_60.bin b/params_shard_60.bin index 0ba362f73a3636c11c28dd3c659de9a91848edec..123e4b3787b14d922cc7edd02b03fc6d6b128dda 100644 --- a/params_shard_60.bin +++ b/params_shard_60.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:55352f7973c577de4c265e2bf03d5e0264b941300383615d15fa1ab1cf81bd53 -size 55050240 +oid sha256:c79b3be76e5fcc16097a0f49bf7ee280b07ff01a521e74b53c36eba5c4ce4bd6 +size 27525120 diff --git a/params_shard_61.bin b/params_shard_61.bin index 3f5f9ec746e55a68c3d8d6c46e7e476d6845ff97..512ef84c5ff30f5ad9359ac2ef3fbaa4b9b6494b 100644 --- a/params_shard_61.bin +++ b/params_shard_61.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5002f3742932bb4ca6563d922746055634bf5ba9eb60396bbcf585a6222af35b -size 27525120 +oid sha256:a86f5189a5c58395ca5631b2f8f39d9b1676a99b32a22f39bad999f9aa45f00e +size 55050240 diff --git a/params_shard_62.bin b/params_shard_62.bin index e56d248117485a7e114807285687baa5e96277a6..c3727c00894697bd84eccc8cf8e4975fcaa0d33e 100644 --- a/params_shard_62.bin +++ b/params_shard_62.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f72bfd92ef9861339fa9302dac4f68b4cd5e9d208965843eb0ce086c9a344d45 -size 55050240 +oid sha256:072744cb1ddb3b9eeaeb0692f1554d4e2ddecd0df33a0323a32e8117c13b588b +size 27525120 diff --git a/params_shard_63.bin b/params_shard_63.bin index 030c709efeebab3f03b5807e2500a9adf764904c..29f937cd151b5f568895206d832c408746a79083 100644 --- a/params_shard_63.bin +++ b/params_shard_63.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02dcf59628943e8b35a39c005e883d6fa38e903b863bfbbd37baac672ec71989 -size 27525120 +oid sha256:2d2dccf094f6d65b8ac23947f4ff505176b5eb7c3ed64794227c3cabb0a8f1b2 +size 33060864 diff --git a/params_shard_64.bin b/params_shard_64.bin index 907391e59c05c9c28eeb63a3d64686a13bbc0ad3..3472f335e922c44117d77a382a5e8c09bc4666c5 100644 --- a/params_shard_64.bin +++ b/params_shard_64.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c83f6c02274653f662d50b02b0febd5ce68f97f2ec2cdfe93308b2f4b417380e +oid sha256:70a1c05274d548de00369e5bc153b0e2062667dd7c258a29025a907e05209d3d size 55050240 diff --git a/params_shard_65.bin b/params_shard_65.bin index eee20e0a2650a0ee494ee9ec9322115334bd7834..ba2949d43d15a527332e131e5f8a1076828fcff5 100644 --- a/params_shard_65.bin +++ b/params_shard_65.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42acef79c1717963875410486b767125718287d7c2e35b4fb52cfa8333688dcb -size 33060864 +oid sha256:c8487c524729c2fa5a16a7168be1be34453defa9d0d41dc6b133da17432f5229 +size 27525120 diff --git a/params_shard_66.bin b/params_shard_66.bin index 865d2fce2f6d028a8cfc453dd96ef4c841072892..dc49ec42b1df67a999a887d19ee756e72256c775 100644 --- a/params_shard_66.bin +++ b/params_shard_66.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:acce6cbc88ef82cd23b55768fcbc109bf25054c0bda1b2d4f1044be0a76890bd -size 11013120 +oid sha256:9b4c15fb1a5359e14d779f6aa546567a118831704b2f92a2952419e8fb629479 +size 11023360 diff --git a/params_shard_7.bin b/params_shard_7.bin index 3eea2d44c86424164d0cb995a514fbbd91af8ff8..cc177c12d9fc31015dc95fa2a4103f5cf80affe6 100644 --- a/params_shard_7.bin +++ b/params_shard_7.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ae873ec03926afbf8cc00be8fcd427b7f9399bed2fb3b5e4f0d05f6ae7222a6 -size 27525120 +oid sha256:c53df5b558c83a4ff3b14d13f2956b6fd1d5e899a631a6426ceae3001c2614d9 +size 33060864 diff --git a/params_shard_8.bin b/params_shard_8.bin index c402a1169419f0877f49b337865a7d79ef4a76f9..d1586226fec5d62c7bd647912f2adce5807d24a5 100644 --- a/params_shard_8.bin +++ b/params_shard_8.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e1446fe702572e22327d6688fa7e94717e3d06ce9555965d98f401b076cc4950 +oid sha256:f80639b6f9bde2a0e5749dc9de9e7154de3288065677e927121001f855487585 size 55050240 diff --git a/params_shard_9.bin b/params_shard_9.bin index 8f5cedfecf661d237d9ef8a70db2841dd2091f1a..abf18ffc7adf787f6895c1f3eab46f3ba1801de7 100644 --- a/params_shard_9.bin +++ b/params_shard_9.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2311497d5dd63458ffca2f50761cffbcd0c769d94d9ae6de9e20797f26df3b9d -size 33060864 +oid sha256:4f90f6da37a0fcf7f699ee789d7d9ca4d6e9013e4b8eae829ee474a11df7e235 +size 27525120 diff --git a/qwen-2-1.5b-q0f16-android.tar b/qwen-2-1.5b-q0f16-android.tar new file mode 100644 index 0000000000000000000000000000000000000000..39647efe2eddf6c88483fac11d4762437d4859b8 --- /dev/null +++ b/qwen-2-1.5b-q0f16-android.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbded9ab23595ae70338e8e382f35327e29fb3ca8a8268ba2511760f15d3b00b +size 332614 diff --git a/tokenizer_config.json b/tokenizer_config.json index ff55d7b9eb1384e5d4d7e75dc0f564c1a8833d6e..f4b55f917af273d0dc98b67ec249f6445dd385f5 100644 --- a/tokenizer_config.json +++ b/tokenizer_config.json @@ -28,9 +28,9 @@ }, "additional_special_tokens": ["<|im_start|>", "<|im_end|>"], "bos_token": null, - "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", "clean_up_tokenization_spaces": false, - "eos_token": "<|im_end|>", + "eos_token": "<|endoftext|>", "errors": "replace", "model_max_length": 32768, "pad_token": "<|endoftext|>",