Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- config.json +51 -0
- model.safetensors +3 -0
- quant_log.csv +449 -0
- quantize_config.json +21 -0
- special_tokens_map.json +17 -0
- tokenizer.json +3 -0
- tokenizer_config.json +196 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_attn_implementation_autoset": true,
|
3 |
+
"_name_or_path": "/home/azureuser/models/models--deepseek-ai--DeepSeek-R1-Distill-Qwen-32B/snapshots/3865e12a1eb7cbd641ab3f9dfc28c588c6b0c1e9/",
|
4 |
+
"architectures": [
|
5 |
+
"Qwen2ForCausalLM"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 151643,
|
9 |
+
"eos_token_id": 151643,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 5120,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 27648,
|
14 |
+
"max_position_embeddings": 131072,
|
15 |
+
"max_window_layers": 64,
|
16 |
+
"model_type": "qwen2",
|
17 |
+
"num_attention_heads": 40,
|
18 |
+
"num_hidden_layers": 64,
|
19 |
+
"num_key_value_heads": 8,
|
20 |
+
"quantization_config": {
|
21 |
+
"bits": 4,
|
22 |
+
"checkpoint_format": "gptq",
|
23 |
+
"desc_act": true,
|
24 |
+
"dynamic": null,
|
25 |
+
"group_size": 128,
|
26 |
+
"lm_head": false,
|
27 |
+
"meta": {
|
28 |
+
"damp_auto_increment": 0.0025,
|
29 |
+
"damp_percent": 0.01,
|
30 |
+
"mse": 0.0,
|
31 |
+
"quantizer": [
|
32 |
+
"gptqmodel:1.7.4"
|
33 |
+
],
|
34 |
+
"static_groups": false,
|
35 |
+
"true_sequential": true,
|
36 |
+
"uri": "https://github.com/modelcloud/gptqmodel"
|
37 |
+
},
|
38 |
+
"quant_method": "gptq",
|
39 |
+
"sym": true
|
40 |
+
},
|
41 |
+
"rms_norm_eps": 1e-05,
|
42 |
+
"rope_scaling": null,
|
43 |
+
"rope_theta": 1000000.0,
|
44 |
+
"sliding_window": null,
|
45 |
+
"tie_word_embeddings": false,
|
46 |
+
"torch_dtype": "bfloat16",
|
47 |
+
"transformers_version": "4.48.2",
|
48 |
+
"use_cache": true,
|
49 |
+
"use_sliding_window": false,
|
50 |
+
"vocab_size": 152064
|
51 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85b4d1782ad6ed1456d47cea60483c26f865f2ab28f94fc88acfa146f4980c8d
|
3 |
+
size 19343988016
|
quant_log.csv
ADDED
@@ -0,0 +1,449 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
layer,module,loss,damp,time
|
2 |
+
0,self_attn.k_proj,0.10951,0.01000,3.883
|
3 |
+
0,self_attn.v_proj,0.03535,0.01000,1.440
|
4 |
+
0,self_attn.q_proj,0.31461,0.01000,1.488
|
5 |
+
0,self_attn.o_proj,1.93236,0.01000,2.823
|
6 |
+
0,mlp.up_proj,0.35166,0.01000,3.473
|
7 |
+
0,mlp.gate_proj,0.38004,0.01000,1.662
|
8 |
+
0,mlp.down_proj,0.40345,0.01000,11.717
|
9 |
+
1,self_attn.k_proj,0.01090,0.01000,3.768
|
10 |
+
1,self_attn.v_proj,0.00528,0.01000,1.429
|
11 |
+
1,self_attn.q_proj,0.04718,0.01000,1.482
|
12 |
+
1,self_attn.o_proj,0.02199,0.01000,2.792
|
13 |
+
1,mlp.up_proj,3.01068,0.01000,3.502
|
14 |
+
1,mlp.gate_proj,9.94816,0.01000,1.675
|
15 |
+
1,mlp.down_proj,0.11068,0.01000,11.446
|
16 |
+
2,self_attn.k_proj,0.04953,0.01000,3.729
|
17 |
+
2,self_attn.v_proj,0.01729,0.01000,1.390
|
18 |
+
2,self_attn.q_proj,0.14619,0.01000,1.444
|
19 |
+
2,self_attn.o_proj,0.14250,0.01000,2.868
|
20 |
+
2,mlp.up_proj,5.58938,0.01000,3.545
|
21 |
+
2,mlp.gate_proj,13.15975,0.01000,1.650
|
22 |
+
2,mlp.down_proj,0.49439,0.01000,11.435
|
23 |
+
3,self_attn.k_proj,0.23805,0.01000,3.756
|
24 |
+
3,self_attn.v_proj,0.07278,0.01000,1.389
|
25 |
+
3,self_attn.q_proj,0.68931,0.01000,1.432
|
26 |
+
3,self_attn.o_proj,0.35515,0.01000,2.812
|
27 |
+
3,mlp.up_proj,6.17924,0.01000,3.486
|
28 |
+
3,mlp.gate_proj,14.39462,0.01000,1.643
|
29 |
+
3,mlp.down_proj,1.04559,0.01000,11.421
|
30 |
+
4,self_attn.k_proj,0.21649,0.01000,3.701
|
31 |
+
4,self_attn.v_proj,0.10355,0.01000,1.398
|
32 |
+
4,self_attn.q_proj,0.66609,0.01000,1.422
|
33 |
+
4,self_attn.o_proj,0.82410,0.01000,2.849
|
34 |
+
4,mlp.up_proj,16.02560,0.01000,3.501
|
35 |
+
4,mlp.gate_proj,30.96535,0.01000,1.635
|
36 |
+
4,mlp.down_proj,74.19284,0.01000,11.527
|
37 |
+
5,self_attn.k_proj,0.78692,0.01000,3.721
|
38 |
+
5,self_attn.v_proj,0.44434,0.01000,1.411
|
39 |
+
5,self_attn.q_proj,2.68566,0.01000,1.494
|
40 |
+
5,self_attn.o_proj,0.83673,0.01000,2.828
|
41 |
+
5,mlp.up_proj,27.54133,0.01000,3.525
|
42 |
+
5,mlp.gate_proj,50.29523,0.01000,1.656
|
43 |
+
5,mlp.down_proj,83.59009,0.01000,11.547
|
44 |
+
6,self_attn.k_proj,0.81260,0.01000,3.718
|
45 |
+
6,self_attn.v_proj,0.59795,0.01000,1.415
|
46 |
+
6,self_attn.q_proj,2.90379,0.01000,1.447
|
47 |
+
6,self_attn.o_proj,0.70393,0.01000,2.820
|
48 |
+
6,mlp.up_proj,45.98330,0.01000,3.510
|
49 |
+
6,mlp.gate_proj,78.89032,0.01000,1.656
|
50 |
+
6,mlp.down_proj,8.37488,0.01000,11.502
|
51 |
+
7,self_attn.k_proj,1.13787,0.01000,3.736
|
52 |
+
7,self_attn.v_proj,0.93084,0.01000,1.376
|
53 |
+
7,self_attn.q_proj,4.12102,0.01000,1.432
|
54 |
+
7,self_attn.o_proj,0.97531,0.01000,2.848
|
55 |
+
7,mlp.up_proj,60.87785,0.01000,3.499
|
56 |
+
7,mlp.gate_proj,105.33450,0.01000,1.660
|
57 |
+
7,mlp.down_proj,3.62189,0.01000,11.500
|
58 |
+
8,self_attn.k_proj,1.43242,0.01000,3.783
|
59 |
+
8,self_attn.v_proj,0.86657,0.01000,1.427
|
60 |
+
8,self_attn.q_proj,5.06832,0.01000,1.469
|
61 |
+
8,self_attn.o_proj,0.84137,0.01000,2.858
|
62 |
+
8,mlp.up_proj,41.36477,0.01000,3.547
|
63 |
+
8,mlp.gate_proj,70.03442,0.01000,1.671
|
64 |
+
8,mlp.down_proj,4.84961,0.01000,11.525
|
65 |
+
9,self_attn.k_proj,1.36615,0.01000,3.767
|
66 |
+
9,self_attn.v_proj,1.13563,0.01000,1.410
|
67 |
+
9,self_attn.q_proj,4.80110,0.01000,1.445
|
68 |
+
9,self_attn.o_proj,1.52607,0.01000,2.839
|
69 |
+
9,mlp.up_proj,24.10745,0.01000,3.526
|
70 |
+
9,mlp.gate_proj,26.14586,0.01000,1.671
|
71 |
+
9,mlp.down_proj,6.05000,0.01000,11.515
|
72 |
+
10,self_attn.k_proj,2.16733,0.01000,3.736
|
73 |
+
10,self_attn.v_proj,1.76791,0.01000,1.432
|
74 |
+
10,self_attn.q_proj,8.01266,0.01000,1.444
|
75 |
+
10,self_attn.o_proj,1.76568,0.01000,2.872
|
76 |
+
10,mlp.up_proj,31.98140,0.01000,3.495
|
77 |
+
10,mlp.gate_proj,34.58262,0.01000,1.668
|
78 |
+
10,mlp.down_proj,10.15542,0.01000,11.435
|
79 |
+
11,self_attn.k_proj,1.54369,0.01000,3.728
|
80 |
+
11,self_attn.v_proj,1.15980,0.01000,1.390
|
81 |
+
11,self_attn.q_proj,5.73237,0.01000,1.448
|
82 |
+
11,self_attn.o_proj,2.40549,0.01000,2.815
|
83 |
+
11,mlp.up_proj,40.48055,0.01000,3.461
|
84 |
+
11,mlp.gate_proj,48.89324,0.01000,1.635
|
85 |
+
11,mlp.down_proj,8.39603,0.01000,11.495
|
86 |
+
12,self_attn.k_proj,1.99017,0.01000,3.780
|
87 |
+
12,self_attn.v_proj,1.44571,0.01000,1.432
|
88 |
+
12,self_attn.q_proj,7.47077,0.01000,1.481
|
89 |
+
12,self_attn.o_proj,2.56372,0.01000,2.848
|
90 |
+
12,mlp.up_proj,43.65084,0.01000,3.515
|
91 |
+
12,mlp.gate_proj,46.83010,0.01000,1.656
|
92 |
+
12,mlp.down_proj,10.60977,0.01000,11.584
|
93 |
+
13,self_attn.k_proj,2.29766,0.01000,3.717
|
94 |
+
13,self_attn.v_proj,1.78979,0.01000,1.413
|
95 |
+
13,self_attn.q_proj,8.21005,0.01000,1.425
|
96 |
+
13,self_attn.o_proj,3.02680,0.01000,2.830
|
97 |
+
13,mlp.up_proj,51.54390,0.01000,3.498
|
98 |
+
13,mlp.gate_proj,55.83699,0.01000,1.652
|
99 |
+
13,mlp.down_proj,12.55419,0.01000,11.524
|
100 |
+
14,self_attn.k_proj,3.06420,0.01000,3.753
|
101 |
+
14,self_attn.v_proj,2.15402,0.01000,1.415
|
102 |
+
14,self_attn.q_proj,11.00309,0.01000,1.464
|
103 |
+
14,self_attn.o_proj,2.54914,0.01000,2.832
|
104 |
+
14,mlp.up_proj,55.35691,0.01000,3.478
|
105 |
+
14,mlp.gate_proj,60.52634,0.01000,1.673
|
106 |
+
14,mlp.down_proj,13.15211,0.01000,11.478
|
107 |
+
15,self_attn.k_proj,2.78606,0.01000,3.673
|
108 |
+
15,self_attn.v_proj,2.29066,0.01000,1.389
|
109 |
+
15,self_attn.q_proj,9.91676,0.01000,1.426
|
110 |
+
15,self_attn.o_proj,3.46237,0.01000,2.848
|
111 |
+
15,mlp.up_proj,57.15676,0.01000,3.484
|
112 |
+
15,mlp.gate_proj,64.80304,0.01000,1.667
|
113 |
+
15,mlp.down_proj,13.22720,0.01000,11.553
|
114 |
+
16,self_attn.k_proj,2.32143,0.01000,3.730
|
115 |
+
16,self_attn.v_proj,1.53318,0.01000,1.404
|
116 |
+
16,self_attn.q_proj,7.89914,0.01000,1.456
|
117 |
+
16,self_attn.o_proj,2.69978,0.01000,2.815
|
118 |
+
16,mlp.up_proj,52.69391,0.01000,3.476
|
119 |
+
16,mlp.gate_proj,56.90802,0.01000,1.666
|
120 |
+
16,mlp.down_proj,11.50479,0.01000,11.536
|
121 |
+
17,self_attn.k_proj,3.04381,0.01000,3.716
|
122 |
+
17,self_attn.v_proj,1.88760,0.01000,1.410
|
123 |
+
17,self_attn.q_proj,10.43633,0.01000,1.445
|
124 |
+
17,self_attn.o_proj,2.43443,0.01000,2.831
|
125 |
+
17,mlp.up_proj,50.52463,0.01000,3.463
|
126 |
+
17,mlp.gate_proj,53.69043,0.01000,1.670
|
127 |
+
17,mlp.down_proj,10.58508,0.01000,11.530
|
128 |
+
18,self_attn.k_proj,2.94937,0.01000,3.698
|
129 |
+
18,self_attn.v_proj,1.88430,0.01000,1.411
|
130 |
+
18,self_attn.q_proj,10.11764,0.01000,1.456
|
131 |
+
18,self_attn.o_proj,3.01183,0.01000,2.818
|
132 |
+
18,mlp.up_proj,48.77782,0.01000,3.487
|
133 |
+
18,mlp.gate_proj,51.53640,0.01000,1.656
|
134 |
+
18,mlp.down_proj,10.17783,0.01000,11.521
|
135 |
+
19,self_attn.k_proj,2.73611,0.01000,3.740
|
136 |
+
19,self_attn.v_proj,1.91514,0.01000,1.397
|
137 |
+
19,self_attn.q_proj,9.88004,0.01000,1.451
|
138 |
+
19,self_attn.o_proj,1.94948,0.01000,2.824
|
139 |
+
19,mlp.up_proj,48.76656,0.01000,3.477
|
140 |
+
19,mlp.gate_proj,51.39761,0.01000,1.674
|
141 |
+
19,mlp.down_proj,9.98934,0.01000,11.471
|
142 |
+
20,self_attn.k_proj,3.05076,0.01000,3.699
|
143 |
+
20,self_attn.v_proj,1.80857,0.01000,1.409
|
144 |
+
20,self_attn.q_proj,10.45554,0.01000,1.441
|
145 |
+
20,self_attn.o_proj,3.12608,0.01000,2.834
|
146 |
+
20,mlp.up_proj,46.56360,0.01000,3.483
|
147 |
+
20,mlp.gate_proj,48.38595,0.01000,1.660
|
148 |
+
20,mlp.down_proj,10.36077,0.01000,11.453
|
149 |
+
21,self_attn.k_proj,2.67004,0.01000,3.729
|
150 |
+
21,self_attn.v_proj,1.66243,0.01000,1.401
|
151 |
+
21,self_attn.q_proj,9.23527,0.01000,1.444
|
152 |
+
21,self_attn.o_proj,3.68392,0.01000,2.846
|
153 |
+
21,mlp.up_proj,45.87252,0.01000,3.485
|
154 |
+
21,mlp.gate_proj,47.80573,0.01000,1.672
|
155 |
+
21,mlp.down_proj,10.36233,0.01000,11.383
|
156 |
+
22,self_attn.k_proj,2.91415,0.01000,3.718
|
157 |
+
22,self_attn.v_proj,2.36053,0.01000,1.429
|
158 |
+
22,self_attn.q_proj,10.42156,0.01000,1.435
|
159 |
+
22,self_attn.o_proj,5.89517,0.01000,2.815
|
160 |
+
22,mlp.up_proj,47.82814,0.01000,3.499
|
161 |
+
22,mlp.gate_proj,49.75899,0.01000,1.650
|
162 |
+
22,mlp.down_proj,11.80908,0.01000,11.540
|
163 |
+
23,self_attn.k_proj,2.88734,0.01000,3.718
|
164 |
+
23,self_attn.v_proj,2.46526,0.01000,1.400
|
165 |
+
23,self_attn.q_proj,10.54750,0.01000,1.428
|
166 |
+
23,self_attn.o_proj,5.54833,0.01000,2.857
|
167 |
+
23,mlp.up_proj,49.94622,0.01000,3.488
|
168 |
+
23,mlp.gate_proj,52.72432,0.01000,1.650
|
169 |
+
23,mlp.down_proj,12.38029,0.01000,11.413
|
170 |
+
24,self_attn.k_proj,3.59931,0.01000,3.717
|
171 |
+
24,self_attn.v_proj,2.48914,0.01000,1.390
|
172 |
+
24,self_attn.q_proj,12.58873,0.01000,1.439
|
173 |
+
24,self_attn.o_proj,4.56547,0.01000,2.834
|
174 |
+
24,mlp.up_proj,49.69126,0.01000,3.512
|
175 |
+
24,mlp.gate_proj,51.73148,0.01000,1.640
|
176 |
+
24,mlp.down_proj,11.49405,0.01000,11.428
|
177 |
+
25,self_attn.k_proj,3.87327,0.01000,3.715
|
178 |
+
25,self_attn.v_proj,2.99191,0.01000,1.393
|
179 |
+
25,self_attn.q_proj,14.13076,0.01000,1.426
|
180 |
+
25,self_attn.o_proj,2.97217,0.01000,2.831
|
181 |
+
25,mlp.up_proj,49.42391,0.01000,3.489
|
182 |
+
25,mlp.gate_proj,50.69650,0.01000,1.653
|
183 |
+
25,mlp.down_proj,11.35342,0.01000,11.421
|
184 |
+
26,self_attn.k_proj,3.37936,0.01000,3.719
|
185 |
+
26,self_attn.v_proj,1.98034,0.01000,1.395
|
186 |
+
26,self_attn.q_proj,12.16627,0.01000,1.464
|
187 |
+
26,self_attn.o_proj,4.32040,0.01000,2.798
|
188 |
+
26,mlp.up_proj,50.47544,0.01000,3.447
|
189 |
+
26,mlp.gate_proj,51.33285,0.01000,1.642
|
190 |
+
26,mlp.down_proj,12.01545,0.01000,11.393
|
191 |
+
27,self_attn.k_proj,3.24756,0.01000,3.686
|
192 |
+
27,self_attn.v_proj,2.04866,0.01000,1.387
|
193 |
+
27,self_attn.q_proj,10.93604,0.01000,1.429
|
194 |
+
27,self_attn.o_proj,6.03632,0.01000,2.781
|
195 |
+
27,mlp.up_proj,51.98524,0.01000,3.452
|
196 |
+
27,mlp.gate_proj,52.96064,0.01000,1.651
|
197 |
+
27,mlp.down_proj,13.92403,0.01000,11.437
|
198 |
+
28,self_attn.k_proj,3.10746,0.01000,3.723
|
199 |
+
28,self_attn.v_proj,3.03016,0.01000,1.399
|
200 |
+
28,self_attn.q_proj,11.74794,0.01000,1.427
|
201 |
+
28,self_attn.o_proj,7.86297,0.01000,2.863
|
202 |
+
28,mlp.up_proj,56.70300,0.01000,3.516
|
203 |
+
28,mlp.gate_proj,57.31458,0.01000,1.636
|
204 |
+
28,mlp.down_proj,16.08722,0.01000,11.465
|
205 |
+
29,self_attn.k_proj,4.52262,0.01000,3.758
|
206 |
+
29,self_attn.v_proj,3.68450,0.01000,1.391
|
207 |
+
29,self_attn.q_proj,15.77346,0.01000,1.449
|
208 |
+
29,self_attn.o_proj,6.53687,0.01000,2.846
|
209 |
+
29,mlp.up_proj,60.68779,0.01000,3.494
|
210 |
+
29,mlp.gate_proj,60.92637,0.01000,1.644
|
211 |
+
29,mlp.down_proj,16.53972,0.01000,11.431
|
212 |
+
30,self_attn.k_proj,3.84173,0.01000,3.729
|
213 |
+
30,self_attn.v_proj,3.77917,0.01000,1.394
|
214 |
+
30,self_attn.q_proj,14.39547,0.01000,1.442
|
215 |
+
30,self_attn.o_proj,9.98629,0.01000,2.841
|
216 |
+
30,mlp.up_proj,65.12813,0.01000,3.505
|
217 |
+
30,mlp.gate_proj,65.36819,0.01000,1.684
|
218 |
+
30,mlp.down_proj,17.68388,0.01000,11.413
|
219 |
+
31,self_attn.k_proj,4.12658,0.01000,3.739
|
220 |
+
31,self_attn.v_proj,3.79275,0.01000,1.435
|
221 |
+
31,self_attn.q_proj,15.06434,0.01000,1.427
|
222 |
+
31,self_attn.o_proj,8.47146,0.01000,2.846
|
223 |
+
31,mlp.up_proj,71.08771,0.01000,3.491
|
224 |
+
31,mlp.gate_proj,71.44981,0.01000,1.654
|
225 |
+
31,mlp.down_proj,18.88679,0.01000,11.388
|
226 |
+
32,self_attn.k_proj,3.81222,0.01000,3.727
|
227 |
+
32,self_attn.v_proj,2.76792,0.01000,1.406
|
228 |
+
32,self_attn.q_proj,13.15588,0.01000,1.434
|
229 |
+
32,self_attn.o_proj,8.52706,0.01000,2.829
|
230 |
+
32,mlp.up_proj,80.01401,0.01000,3.501
|
231 |
+
32,mlp.gate_proj,83.70457,0.01000,1.645
|
232 |
+
32,mlp.down_proj,22.14128,0.01000,11.399
|
233 |
+
33,self_attn.k_proj,4.10112,0.01000,3.714
|
234 |
+
33,self_attn.v_proj,3.03966,0.01000,1.389
|
235 |
+
33,self_attn.q_proj,14.94937,0.01000,1.454
|
236 |
+
33,self_attn.o_proj,8.60124,0.01000,2.811
|
237 |
+
33,mlp.up_proj,74.89600,0.01000,3.459
|
238 |
+
33,mlp.gate_proj,76.67551,0.01000,1.662
|
239 |
+
33,mlp.down_proj,20.84193,0.01000,11.461
|
240 |
+
34,self_attn.k_proj,3.91466,0.01000,3.769
|
241 |
+
34,self_attn.v_proj,3.28405,0.01000,1.420
|
242 |
+
34,self_attn.q_proj,14.49163,0.01000,1.474
|
243 |
+
34,self_attn.o_proj,11.78429,0.01000,2.807
|
244 |
+
34,mlp.up_proj,73.45476,0.01000,3.468
|
245 |
+
34,mlp.gate_proj,73.74457,0.01000,1.658
|
246 |
+
34,mlp.down_proj,20.24182,0.01000,11.424
|
247 |
+
35,self_attn.k_proj,3.98005,0.01000,3.667
|
248 |
+
35,self_attn.v_proj,3.12848,0.01000,1.390
|
249 |
+
35,self_attn.q_proj,16.17864,0.01000,1.437
|
250 |
+
35,self_attn.o_proj,6.41689,0.01000,2.804
|
251 |
+
35,mlp.up_proj,72.85312,0.01000,3.490
|
252 |
+
35,mlp.gate_proj,72.84827,0.01000,1.652
|
253 |
+
35,mlp.down_proj,19.52649,0.01000,11.360
|
254 |
+
36,self_attn.k_proj,4.11925,0.01000,3.725
|
255 |
+
36,self_attn.v_proj,3.06211,0.01000,1.397
|
256 |
+
36,self_attn.q_proj,15.48971,0.01000,1.421
|
257 |
+
36,self_attn.o_proj,9.70860,0.01000,2.834
|
258 |
+
36,mlp.up_proj,67.59813,0.01000,3.531
|
259 |
+
36,mlp.gate_proj,65.03483,0.01000,1.642
|
260 |
+
36,mlp.down_proj,19.41002,0.01000,11.453
|
261 |
+
37,self_attn.k_proj,3.61643,0.01000,3.711
|
262 |
+
37,self_attn.v_proj,2.82563,0.01000,1.392
|
263 |
+
37,self_attn.q_proj,13.50569,0.01000,1.442
|
264 |
+
37,self_attn.o_proj,8.54290,0.01000,2.839
|
265 |
+
37,mlp.up_proj,66.48640,0.01000,3.494
|
266 |
+
37,mlp.gate_proj,63.73344,0.01000,1.642
|
267 |
+
37,mlp.down_proj,18.53870,0.01000,11.432
|
268 |
+
38,self_attn.k_proj,4.05270,0.01000,3.732
|
269 |
+
38,self_attn.v_proj,4.14549,0.01000,1.390
|
270 |
+
38,self_attn.q_proj,14.87232,0.01000,1.457
|
271 |
+
38,self_attn.o_proj,11.18412,0.01000,2.809
|
272 |
+
38,mlp.up_proj,67.93635,0.01000,3.582
|
273 |
+
38,mlp.gate_proj,65.45750,0.01000,1.757
|
274 |
+
38,mlp.down_proj,19.42718,0.01000,11.550
|
275 |
+
39,self_attn.k_proj,3.82456,0.01000,3.721
|
276 |
+
39,self_attn.v_proj,4.29623,0.01000,1.421
|
277 |
+
39,self_attn.q_proj,15.28368,0.01000,1.503
|
278 |
+
39,self_attn.o_proj,11.31686,0.01000,2.898
|
279 |
+
39,mlp.up_proj,69.10301,0.01000,3.888
|
280 |
+
39,mlp.gate_proj,69.18791,0.01000,2.068
|
281 |
+
39,mlp.down_proj,19.87582,0.01000,11.851
|
282 |
+
40,self_attn.k_proj,4.73092,0.01000,3.738
|
283 |
+
40,self_attn.v_proj,3.68610,0.01000,1.407
|
284 |
+
40,self_attn.q_proj,16.76764,0.01000,1.504
|
285 |
+
40,self_attn.o_proj,10.72335,0.01000,2.913
|
286 |
+
40,mlp.up_proj,66.13728,0.01000,3.879
|
287 |
+
40,mlp.gate_proj,65.15390,0.01000,2.051
|
288 |
+
40,mlp.down_proj,17.66138,0.01000,11.917
|
289 |
+
41,self_attn.k_proj,4.53810,0.01000,3.757
|
290 |
+
41,self_attn.v_proj,4.69571,0.01000,1.412
|
291 |
+
41,self_attn.q_proj,17.62110,0.01000,1.523
|
292 |
+
41,self_attn.o_proj,7.87525,0.01000,2.914
|
293 |
+
41,mlp.up_proj,65.88686,0.01000,3.911
|
294 |
+
41,mlp.gate_proj,63.16815,0.01000,2.050
|
295 |
+
41,mlp.down_proj,17.90001,0.01000,11.953
|
296 |
+
42,self_attn.k_proj,4.11581,0.01000,3.760
|
297 |
+
42,self_attn.v_proj,2.94913,0.01000,1.401
|
298 |
+
42,self_attn.q_proj,15.66399,0.01000,1.513
|
299 |
+
42,self_attn.o_proj,7.23498,0.01000,2.907
|
300 |
+
42,mlp.up_proj,69.44088,0.01000,3.894
|
301 |
+
42,mlp.gate_proj,65.32219,0.01000,2.091
|
302 |
+
42,mlp.down_proj,19.95204,0.01000,11.885
|
303 |
+
43,self_attn.k_proj,3.98492,0.01000,3.750
|
304 |
+
43,self_attn.v_proj,3.43731,0.01000,1.407
|
305 |
+
43,self_attn.q_proj,14.57642,0.01000,1.514
|
306 |
+
43,self_attn.o_proj,8.84358,0.01000,2.904
|
307 |
+
43,mlp.up_proj,71.93735,0.01000,3.909
|
308 |
+
43,mlp.gate_proj,67.84066,0.01000,2.065
|
309 |
+
43,mlp.down_proj,23.77357,0.01000,11.793
|
310 |
+
44,self_attn.k_proj,3.42093,0.01000,3.720
|
311 |
+
44,self_attn.v_proj,4.85997,0.01000,1.391
|
312 |
+
44,self_attn.q_proj,14.46185,0.01000,1.512
|
313 |
+
44,self_attn.o_proj,12.07453,0.01000,2.843
|
314 |
+
44,mlp.up_proj,75.22107,0.01000,3.832
|
315 |
+
44,mlp.gate_proj,70.81406,0.01000,2.011
|
316 |
+
44,mlp.down_proj,25.91680,0.01000,11.885
|
317 |
+
45,self_attn.k_proj,4.28960,0.01000,3.739
|
318 |
+
45,self_attn.v_proj,5.02471,0.01000,1.391
|
319 |
+
45,self_attn.q_proj,16.49854,0.01000,1.494
|
320 |
+
45,self_attn.o_proj,10.15552,0.01000,2.883
|
321 |
+
45,mlp.up_proj,76.72332,0.01000,3.832
|
322 |
+
45,mlp.gate_proj,72.49304,0.01000,2.036
|
323 |
+
45,mlp.down_proj,27.25921,0.01000,11.788
|
324 |
+
46,self_attn.k_proj,3.65442,0.01000,3.709
|
325 |
+
46,self_attn.v_proj,5.51480,0.01000,1.432
|
326 |
+
46,self_attn.q_proj,15.29177,0.01000,1.488
|
327 |
+
46,self_attn.o_proj,14.40157,0.01000,2.891
|
328 |
+
46,mlp.up_proj,79.03833,0.01000,3.877
|
329 |
+
46,mlp.gate_proj,75.19668,0.01000,2.045
|
330 |
+
46,mlp.down_proj,29.75313,0.01000,11.788
|
331 |
+
47,self_attn.k_proj,3.74196,0.01000,3.711
|
332 |
+
47,self_attn.v_proj,4.80036,0.01000,1.384
|
333 |
+
47,self_attn.q_proj,15.31683,0.01000,1.500
|
334 |
+
47,self_attn.o_proj,10.98499,0.01000,2.897
|
335 |
+
47,mlp.up_proj,86.83043,0.01000,3.877
|
336 |
+
47,mlp.gate_proj,83.43273,0.01000,2.016
|
337 |
+
47,mlp.down_proj,35.28318,0.01000,11.780
|
338 |
+
48,self_attn.k_proj,4.09442,0.01000,3.712
|
339 |
+
48,self_attn.v_proj,6.84564,0.01000,1.391
|
340 |
+
48,self_attn.q_proj,16.91884,0.01000,1.507
|
341 |
+
48,self_attn.o_proj,11.48125,0.01000,2.877
|
342 |
+
48,mlp.up_proj,94.18584,0.01000,3.882
|
343 |
+
48,mlp.gate_proj,91.58864,0.01000,2.023
|
344 |
+
48,mlp.down_proj,41.43192,0.01000,11.792
|
345 |
+
49,self_attn.k_proj,4.14622,0.01000,3.721
|
346 |
+
49,self_attn.v_proj,6.16909,0.01000,1.385
|
347 |
+
49,self_attn.q_proj,17.88456,0.01000,1.511
|
348 |
+
49,self_attn.o_proj,12.20423,0.01000,2.897
|
349 |
+
49,mlp.up_proj,110.47681,0.01000,3.879
|
350 |
+
49,mlp.gate_proj,108.09877,0.01000,2.022
|
351 |
+
49,mlp.down_proj,60.00366,0.01000,11.821
|
352 |
+
50,self_attn.k_proj,4.31881,0.01000,3.676
|
353 |
+
50,self_attn.v_proj,7.56463,0.01000,1.412
|
354 |
+
50,self_attn.q_proj,19.33999,0.01000,1.521
|
355 |
+
50,self_attn.o_proj,11.80367,0.01000,2.872
|
356 |
+
50,mlp.up_proj,126.76958,0.01000,3.870
|
357 |
+
50,mlp.gate_proj,127.11801,0.01000,2.046
|
358 |
+
50,mlp.down_proj,71.79101,0.01000,11.783
|
359 |
+
51,self_attn.k_proj,4.37520,0.01000,3.744
|
360 |
+
51,self_attn.v_proj,6.71142,0.01000,1.412
|
361 |
+
51,self_attn.q_proj,18.20211,0.01000,1.521
|
362 |
+
51,self_attn.o_proj,18.99162,0.01000,2.883
|
363 |
+
51,mlp.up_proj,146.28067,0.01000,3.862
|
364 |
+
51,mlp.gate_proj,148.43158,0.01000,2.021
|
365 |
+
51,mlp.down_proj,89.34132,0.01000,11.793
|
366 |
+
52,self_attn.k_proj,5.08537,0.01000,3.710
|
367 |
+
52,self_attn.v_proj,11.85403,0.01000,1.396
|
368 |
+
52,self_attn.q_proj,22.21476,0.01000,1.513
|
369 |
+
52,self_attn.o_proj,18.47747,0.01000,2.896
|
370 |
+
52,mlp.up_proj,160.65359,0.01000,3.853
|
371 |
+
52,mlp.gate_proj,161.82695,0.01000,2.014
|
372 |
+
52,mlp.down_proj,108.08738,0.01000,11.832
|
373 |
+
53,self_attn.k_proj,5.93244,0.01000,3.760
|
374 |
+
53,self_attn.v_proj,11.73937,0.01000,1.413
|
375 |
+
53,self_attn.q_proj,24.38095,0.01000,1.508
|
376 |
+
53,self_attn.o_proj,17.68197,0.01000,2.937
|
377 |
+
53,mlp.up_proj,180.51447,0.01000,3.881
|
378 |
+
53,mlp.gate_proj,181.77620,0.01000,2.017
|
379 |
+
53,mlp.down_proj,121.80530,0.01000,11.752
|
380 |
+
54,self_attn.k_proj,5.59463,0.01000,3.721
|
381 |
+
54,self_attn.v_proj,10.78246,0.01000,1.383
|
382 |
+
54,self_attn.q_proj,23.04250,0.01000,1.499
|
383 |
+
54,self_attn.o_proj,16.19074,0.01000,2.904
|
384 |
+
54,mlp.up_proj,199.69888,0.01000,3.845
|
385 |
+
54,mlp.gate_proj,198.83350,0.01000,2.035
|
386 |
+
54,mlp.down_proj,135.99011,0.01000,11.733
|
387 |
+
55,self_attn.k_proj,5.40018,0.01000,3.705
|
388 |
+
55,self_attn.v_proj,11.41265,0.01000,1.411
|
389 |
+
55,self_attn.q_proj,24.45917,0.01000,1.498
|
390 |
+
55,self_attn.o_proj,20.51224,0.01000,2.869
|
391 |
+
55,mlp.up_proj,223.49593,0.01000,3.870
|
392 |
+
55,mlp.gate_proj,219.73848,0.01000,2.015
|
393 |
+
55,mlp.down_proj,161.11020,0.01000,11.837
|
394 |
+
56,self_attn.k_proj,6.33442,0.01000,3.718
|
395 |
+
56,self_attn.v_proj,16.18852,0.01000,1.388
|
396 |
+
56,self_attn.q_proj,27.11760,0.01000,1.506
|
397 |
+
56,self_attn.o_proj,22.74030,0.01000,2.901
|
398 |
+
56,mlp.up_proj,249.24750,0.01000,3.881
|
399 |
+
56,mlp.gate_proj,243.24731,0.01000,2.042
|
400 |
+
56,mlp.down_proj,186.52641,0.01000,11.763
|
401 |
+
57,self_attn.k_proj,6.23077,0.01000,3.759
|
402 |
+
57,self_attn.v_proj,16.85532,0.01000,1.394
|
403 |
+
57,self_attn.q_proj,29.05512,0.01000,1.517
|
404 |
+
57,self_attn.o_proj,16.64697,0.01000,2.883
|
405 |
+
57,mlp.up_proj,274.13293,0.01000,3.880
|
406 |
+
57,mlp.gate_proj,263.13092,0.01000,2.032
|
407 |
+
57,mlp.down_proj,210.55884,0.01000,11.743
|
408 |
+
58,self_attn.k_proj,6.47978,0.01000,3.707
|
409 |
+
58,self_attn.v_proj,19.14422,0.01000,1.445
|
410 |
+
58,self_attn.q_proj,27.92134,0.01000,1.511
|
411 |
+
58,self_attn.o_proj,22.04212,0.01000,2.907
|
412 |
+
58,mlp.up_proj,300.86035,0.01000,3.856
|
413 |
+
58,mlp.gate_proj,284.91528,0.01000,2.011
|
414 |
+
58,mlp.down_proj,253.12970,0.01000,11.804
|
415 |
+
59,self_attn.k_proj,7.25131,0.01000,3.754
|
416 |
+
59,self_attn.v_proj,26.22610,0.01000,1.394
|
417 |
+
59,self_attn.q_proj,33.76220,0.01000,1.536
|
418 |
+
59,self_attn.o_proj,39.83875,0.01000,2.878
|
419 |
+
59,mlp.up_proj,334.85654,0.01000,3.752
|
420 |
+
59,mlp.gate_proj,311.49915,0.01000,1.804
|
421 |
+
59,mlp.down_proj,304.22244,0.01000,11.518
|
422 |
+
60,self_attn.k_proj,6.24524,0.01000,3.712
|
423 |
+
60,self_attn.v_proj,30.27362,0.01000,1.394
|
424 |
+
60,self_attn.q_proj,32.80447,0.01000,1.518
|
425 |
+
60,self_attn.o_proj,58.39975,0.01000,2.885
|
426 |
+
60,mlp.up_proj,372.51617,0.01000,3.867
|
427 |
+
60,mlp.gate_proj,340.40439,0.01000,2.023
|
428 |
+
60,mlp.down_proj,413.50845,0.01000,11.941
|
429 |
+
61,self_attn.k_proj,7.28852,0.01000,3.762
|
430 |
+
61,self_attn.v_proj,40.24563,0.01000,1.401
|
431 |
+
61,self_attn.q_proj,36.65611,0.01000,1.522
|
432 |
+
61,self_attn.o_proj,70.62356,0.01000,2.939
|
433 |
+
61,mlp.up_proj,423.02716,0.01000,3.997
|
434 |
+
61,mlp.gate_proj,388.64325,0.01000,2.048
|
435 |
+
61,mlp.down_proj,529.49036,0.01000,11.913
|
436 |
+
62,self_attn.k_proj,6.85958,0.01000,3.763
|
437 |
+
62,self_attn.v_proj,45.74522,0.01000,1.419
|
438 |
+
62,self_attn.q_proj,35.89644,0.01000,1.527
|
439 |
+
62,self_attn.o_proj,147.87845,0.01000,2.912
|
440 |
+
62,mlp.up_proj,437.13004,0.01000,3.899
|
441 |
+
62,mlp.gate_proj,411.97052,0.01000,1.726
|
442 |
+
62,mlp.down_proj,1003.17969,0.01000,11.636
|
443 |
+
63,self_attn.k_proj,6.30572,0.01000,3.771
|
444 |
+
63,self_attn.v_proj,32.41533,0.01000,1.398
|
445 |
+
63,self_attn.q_proj,27.82413,0.01000,1.497
|
446 |
+
63,self_attn.o_proj,68.36284,0.01000,2.911
|
447 |
+
63,mlp.up_proj,530.98322,0.01000,4.016
|
448 |
+
63,mlp.gate_proj,512.05829,0.01000,2.076
|
449 |
+
63,mlp.down_proj,2032.59351,0.01000,12.026
|
quantize_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bits": 4,
|
3 |
+
"dynamic": null,
|
4 |
+
"group_size": 128,
|
5 |
+
"desc_act": true,
|
6 |
+
"sym": true,
|
7 |
+
"lm_head": false,
|
8 |
+
"quant_method": "gptq",
|
9 |
+
"checkpoint_format": "gptq",
|
10 |
+
"meta": {
|
11 |
+
"quantizer": [
|
12 |
+
"gptqmodel:1.7.4"
|
13 |
+
],
|
14 |
+
"uri": "https://github.com/modelcloud/gptqmodel",
|
15 |
+
"damp_percent": 0.01,
|
16 |
+
"damp_auto_increment": 0.0025,
|
17 |
+
"static_groups": false,
|
18 |
+
"true_sequential": true,
|
19 |
+
"mse": 0.0
|
20 |
+
}
|
21 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|begin▁of▁sentence|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|end▁of▁sentence|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<unk>"
|
17 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
|
3 |
+
size 11422778
|
tokenizer_config.json
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"add_prefix_space": null,
|
5 |
+
"added_tokens_decoder": {
|
6 |
+
"151643": {
|
7 |
+
"content": "<|end▁of▁sentence|>",
|
8 |
+
"lstrip": false,
|
9 |
+
"normalized": false,
|
10 |
+
"rstrip": false,
|
11 |
+
"single_word": false,
|
12 |
+
"special": true
|
13 |
+
},
|
14 |
+
"151644": {
|
15 |
+
"content": "<|User|>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": false,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false,
|
20 |
+
"special": false
|
21 |
+
},
|
22 |
+
"151645": {
|
23 |
+
"content": "<|Assistant|>",
|
24 |
+
"lstrip": false,
|
25 |
+
"normalized": false,
|
26 |
+
"rstrip": false,
|
27 |
+
"single_word": false,
|
28 |
+
"special": false
|
29 |
+
},
|
30 |
+
"151646": {
|
31 |
+
"content": "<|begin▁of▁sentence|>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false,
|
36 |
+
"special": true
|
37 |
+
},
|
38 |
+
"151647": {
|
39 |
+
"content": "<|EOT|>",
|
40 |
+
"lstrip": false,
|
41 |
+
"normalized": false,
|
42 |
+
"rstrip": false,
|
43 |
+
"single_word": false,
|
44 |
+
"special": false
|
45 |
+
},
|
46 |
+
"151648": {
|
47 |
+
"content": "<think>",
|
48 |
+
"lstrip": false,
|
49 |
+
"normalized": false,
|
50 |
+
"rstrip": false,
|
51 |
+
"single_word": false,
|
52 |
+
"special": false
|
53 |
+
},
|
54 |
+
"151649": {
|
55 |
+
"content": "</think>",
|
56 |
+
"lstrip": false,
|
57 |
+
"normalized": false,
|
58 |
+
"rstrip": false,
|
59 |
+
"single_word": false,
|
60 |
+
"special": false
|
61 |
+
},
|
62 |
+
"151650": {
|
63 |
+
"content": "<|quad_start|>",
|
64 |
+
"lstrip": false,
|
65 |
+
"normalized": false,
|
66 |
+
"rstrip": false,
|
67 |
+
"single_word": false,
|
68 |
+
"special": true
|
69 |
+
},
|
70 |
+
"151651": {
|
71 |
+
"content": "<|quad_end|>",
|
72 |
+
"lstrip": false,
|
73 |
+
"normalized": false,
|
74 |
+
"rstrip": false,
|
75 |
+
"single_word": false,
|
76 |
+
"special": true
|
77 |
+
},
|
78 |
+
"151652": {
|
79 |
+
"content": "<|vision_start|>",
|
80 |
+
"lstrip": false,
|
81 |
+
"normalized": false,
|
82 |
+
"rstrip": false,
|
83 |
+
"single_word": false,
|
84 |
+
"special": true
|
85 |
+
},
|
86 |
+
"151653": {
|
87 |
+
"content": "<|vision_end|>",
|
88 |
+
"lstrip": false,
|
89 |
+
"normalized": false,
|
90 |
+
"rstrip": false,
|
91 |
+
"single_word": false,
|
92 |
+
"special": true
|
93 |
+
},
|
94 |
+
"151654": {
|
95 |
+
"content": "<|vision_pad|>",
|
96 |
+
"lstrip": false,
|
97 |
+
"normalized": false,
|
98 |
+
"rstrip": false,
|
99 |
+
"single_word": false,
|
100 |
+
"special": true
|
101 |
+
},
|
102 |
+
"151655": {
|
103 |
+
"content": "<|image_pad|>",
|
104 |
+
"lstrip": false,
|
105 |
+
"normalized": false,
|
106 |
+
"rstrip": false,
|
107 |
+
"single_word": false,
|
108 |
+
"special": true
|
109 |
+
},
|
110 |
+
"151656": {
|
111 |
+
"content": "<|video_pad|>",
|
112 |
+
"lstrip": false,
|
113 |
+
"normalized": false,
|
114 |
+
"rstrip": false,
|
115 |
+
"single_word": false,
|
116 |
+
"special": true
|
117 |
+
},
|
118 |
+
"151657": {
|
119 |
+
"content": "<tool_call>",
|
120 |
+
"lstrip": false,
|
121 |
+
"normalized": false,
|
122 |
+
"rstrip": false,
|
123 |
+
"single_word": false,
|
124 |
+
"special": false
|
125 |
+
},
|
126 |
+
"151658": {
|
127 |
+
"content": "</tool_call>",
|
128 |
+
"lstrip": false,
|
129 |
+
"normalized": false,
|
130 |
+
"rstrip": false,
|
131 |
+
"single_word": false,
|
132 |
+
"special": false
|
133 |
+
},
|
134 |
+
"151659": {
|
135 |
+
"content": "<|fim_prefix|>",
|
136 |
+
"lstrip": false,
|
137 |
+
"normalized": false,
|
138 |
+
"rstrip": false,
|
139 |
+
"single_word": false,
|
140 |
+
"special": false
|
141 |
+
},
|
142 |
+
"151660": {
|
143 |
+
"content": "<|fim_middle|>",
|
144 |
+
"lstrip": false,
|
145 |
+
"normalized": false,
|
146 |
+
"rstrip": false,
|
147 |
+
"single_word": false,
|
148 |
+
"special": false
|
149 |
+
},
|
150 |
+
"151661": {
|
151 |
+
"content": "<|fim_suffix|>",
|
152 |
+
"lstrip": false,
|
153 |
+
"normalized": false,
|
154 |
+
"rstrip": false,
|
155 |
+
"single_word": false,
|
156 |
+
"special": false
|
157 |
+
},
|
158 |
+
"151662": {
|
159 |
+
"content": "<|fim_pad|>",
|
160 |
+
"lstrip": false,
|
161 |
+
"normalized": false,
|
162 |
+
"rstrip": false,
|
163 |
+
"single_word": false,
|
164 |
+
"special": false
|
165 |
+
},
|
166 |
+
"151663": {
|
167 |
+
"content": "<|repo_name|>",
|
168 |
+
"lstrip": false,
|
169 |
+
"normalized": false,
|
170 |
+
"rstrip": false,
|
171 |
+
"single_word": false,
|
172 |
+
"special": false
|
173 |
+
},
|
174 |
+
"151664": {
|
175 |
+
"content": "<|file_sep|>",
|
176 |
+
"lstrip": false,
|
177 |
+
"normalized": false,
|
178 |
+
"rstrip": false,
|
179 |
+
"single_word": false,
|
180 |
+
"special": false
|
181 |
+
}
|
182 |
+
},
|
183 |
+
"bos_token": "<|begin▁of▁sentence|>",
|
184 |
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin��>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
185 |
+
"clean_up_tokenization_spaces": false,
|
186 |
+
"eos_token": "<|end▁of▁sentence|>",
|
187 |
+
"extra_special_tokens": {},
|
188 |
+
"legacy": true,
|
189 |
+
"model_max_length": 16384,
|
190 |
+
"pad_token": "<unk>",
|
191 |
+
"sp_model_kwargs": {},
|
192 |
+
"tokenizer_class": "LlamaTokenizerFast",
|
193 |
+
"unk_token": null,
|
194 |
+
"use_default_system_prompt": false,
|
195 |
+
"_commit_hash": null
|
196 |
+
}
|