WangXFng commited on
Commit
e102c6d
·
verified ·
1 Parent(s): 18cf564

Model save

Browse files
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "o_proj",
 
24
  "down_proj",
25
  "gate_proj",
26
- "q_proj",
27
- "k_proj",
28
- "v_proj",
29
- "up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "up_proj",
24
+ "k_proj",
25
  "o_proj",
26
+ "q_proj",
27
  "down_proj",
28
  "gate_proj",
29
+ "v_proj"
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ee301ae8658ad54a8214ec130a4adf65df37c06bfee5f5d0cc110bf91d4ff62
3
  size 1684597880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60b52762cf7801185937e75435d400062f4235e285022b9fe2401a88b49afe3d
3
  size 1684597880
trainer_state.json CHANGED
@@ -10,236 +10,236 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.12135922330097088,
13
- "grad_norm": 1.0340704917907715,
14
  "learning_rate": 9.698956057295461e-05,
15
- "loss": 1.2072,
16
  "step": 250
17
  },
18
  {
19
  "epoch": 0.24271844660194175,
20
- "grad_norm": 1.0259687900543213,
21
  "learning_rate": 9.395484340859432e-05,
22
- "loss": 0.6533,
23
  "step": 500
24
  },
25
  {
26
  "epoch": 0.3640776699029126,
27
- "grad_norm": 0.8549349308013916,
28
  "learning_rate": 9.092012624423404e-05,
29
- "loss": 0.5518,
30
  "step": 750
31
  },
32
  {
33
  "epoch": 0.4854368932038835,
34
- "grad_norm": 0.7689054608345032,
35
  "learning_rate": 8.788540907987377e-05,
36
- "loss": 0.5109,
37
  "step": 1000
38
  },
39
  {
40
  "epoch": 0.6067961165048543,
41
- "grad_norm": 0.6630316972732544,
42
  "learning_rate": 8.485069191551348e-05,
43
- "loss": 0.4973,
44
  "step": 1250
45
  },
46
  {
47
  "epoch": 0.7281553398058253,
48
- "grad_norm": 0.6936432719230652,
49
  "learning_rate": 8.181597475115321e-05,
50
- "loss": 0.4815,
51
  "step": 1500
52
  },
53
  {
54
  "epoch": 0.8495145631067961,
55
- "grad_norm": 0.6800591945648193,
56
  "learning_rate": 7.878125758679291e-05,
57
- "loss": 0.4741,
58
  "step": 1750
59
  },
60
  {
61
  "epoch": 0.970873786407767,
62
- "grad_norm": 0.6063706278800964,
63
  "learning_rate": 7.574654042243264e-05,
64
- "loss": 0.4672,
65
  "step": 2000
66
  },
67
  {
68
  "epoch": 1.0922330097087378,
69
- "grad_norm": 0.6320546865463257,
70
  "learning_rate": 7.271182325807235e-05,
71
- "loss": 0.4585,
72
  "step": 2250
73
  },
74
  {
75
  "epoch": 1.2135922330097086,
76
- "grad_norm": 0.6298216581344604,
77
  "learning_rate": 6.967710609371208e-05,
78
- "loss": 0.4474,
79
  "step": 2500
80
  },
81
  {
82
  "epoch": 1.3349514563106797,
83
- "grad_norm": 0.6066320538520813,
84
  "learning_rate": 6.664238892935178e-05,
85
- "loss": 0.4425,
86
  "step": 2750
87
  },
88
  {
89
  "epoch": 1.4563106796116505,
90
- "grad_norm": 0.7594243884086609,
91
  "learning_rate": 6.360767176499151e-05,
92
- "loss": 0.4289,
93
  "step": 3000
94
  },
95
  {
96
  "epoch": 1.5776699029126213,
97
- "grad_norm": 0.8379995822906494,
98
  "learning_rate": 6.0572954600631224e-05,
99
- "loss": 0.4119,
100
  "step": 3250
101
  },
102
  {
103
  "epoch": 1.6990291262135924,
104
- "grad_norm": 0.8765040040016174,
105
  "learning_rate": 5.7538237436270945e-05,
106
- "loss": 0.3954,
107
  "step": 3500
108
  },
109
  {
110
  "epoch": 1.820388349514563,
111
- "grad_norm": 0.8040120005607605,
112
  "learning_rate": 5.450352027191066e-05,
113
- "loss": 0.3772,
114
  "step": 3750
115
  },
116
  {
117
  "epoch": 1.941747572815534,
118
- "grad_norm": 0.8949540257453918,
119
  "learning_rate": 5.146880310755038e-05,
120
- "loss": 0.3596,
121
  "step": 4000
122
  },
123
  {
124
  "epoch": 2.063106796116505,
125
- "grad_norm": 0.986718475818634,
126
  "learning_rate": 4.84340859431901e-05,
127
- "loss": 0.332,
128
  "step": 4250
129
  },
130
  {
131
  "epoch": 2.1844660194174756,
132
- "grad_norm": 0.9565535187721252,
133
  "learning_rate": 4.539936877882982e-05,
134
- "loss": 0.313,
135
  "step": 4500
136
  },
137
  {
138
  "epoch": 2.3058252427184467,
139
- "grad_norm": 0.9416393041610718,
140
  "learning_rate": 4.236465161446954e-05,
141
- "loss": 0.3034,
142
  "step": 4750
143
  },
144
  {
145
  "epoch": 2.4271844660194173,
146
- "grad_norm": 1.002323865890503,
147
  "learning_rate": 3.932993445010925e-05,
148
- "loss": 0.2928,
149
  "step": 5000
150
  },
151
  {
152
  "epoch": 2.5485436893203883,
153
- "grad_norm": 0.9281540513038635,
154
  "learning_rate": 3.6295217285748975e-05,
155
- "loss": 0.2829,
156
  "step": 5250
157
  },
158
  {
159
  "epoch": 2.6699029126213594,
160
- "grad_norm": 1.0740528106689453,
161
  "learning_rate": 3.326050012138869e-05,
162
- "loss": 0.2732,
163
  "step": 5500
164
  },
165
  {
166
  "epoch": 2.79126213592233,
167
- "grad_norm": 0.952369749546051,
168
  "learning_rate": 3.022578295702841e-05,
169
- "loss": 0.2692,
170
  "step": 5750
171
  },
172
  {
173
  "epoch": 2.912621359223301,
174
- "grad_norm": 0.9148930907249451,
175
  "learning_rate": 2.7191065792668125e-05,
176
- "loss": 0.261,
177
  "step": 6000
178
  },
179
  {
180
  "epoch": 3.033980582524272,
181
- "grad_norm": 0.9394662380218506,
182
  "learning_rate": 2.4156348628307843e-05,
183
- "loss": 0.2489,
184
  "step": 6250
185
  },
186
  {
187
  "epoch": 3.1553398058252426,
188
- "grad_norm": 0.9207432866096497,
189
  "learning_rate": 2.112163146394756e-05,
190
- "loss": 0.2351,
191
  "step": 6500
192
  },
193
  {
194
  "epoch": 3.2766990291262137,
195
- "grad_norm": 0.9576259255409241,
196
  "learning_rate": 1.808691429958728e-05,
197
- "loss": 0.2315,
198
  "step": 6750
199
  },
200
  {
201
  "epoch": 3.3980582524271843,
202
- "grad_norm": 1.0424150228500366,
203
  "learning_rate": 1.5052197135226997e-05,
204
- "loss": 0.2287,
205
  "step": 7000
206
  },
207
  {
208
  "epoch": 3.5194174757281553,
209
- "grad_norm": 0.9650384783744812,
210
  "learning_rate": 1.2017479970866715e-05,
211
- "loss": 0.2267,
212
  "step": 7250
213
  },
214
  {
215
  "epoch": 3.6407766990291264,
216
- "grad_norm": 0.939440131187439,
217
  "learning_rate": 8.982762806506435e-06,
218
- "loss": 0.2245,
219
  "step": 7500
220
  },
221
  {
222
  "epoch": 3.762135922330097,
223
- "grad_norm": 0.9506264328956604,
224
  "learning_rate": 5.948045642146152e-06,
225
- "loss": 0.221,
226
  "step": 7750
227
  },
228
  {
229
  "epoch": 3.883495145631068,
230
- "grad_norm": 0.9145563244819641,
231
  "learning_rate": 2.9133284777858704e-06,
232
- "loss": 0.2214,
233
  "step": 8000
234
  },
235
  {
236
  "epoch": 4.0,
237
  "step": 8240,
238
  "total_flos": 1.1079720316327956e+18,
239
- "train_loss": 0.38045854198122486,
240
- "train_runtime": 14779.6974,
241
- "train_samples_per_second": 35.681,
242
- "train_steps_per_second": 0.558
243
  }
244
  ],
245
  "logging_steps": 250,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.12135922330097088,
13
+ "grad_norm": 0.9599943161010742,
14
  "learning_rate": 9.698956057295461e-05,
15
+ "loss": 1.2196,
16
  "step": 250
17
  },
18
  {
19
  "epoch": 0.24271844660194175,
20
+ "grad_norm": 1.0285232067108154,
21
  "learning_rate": 9.395484340859432e-05,
22
+ "loss": 0.6688,
23
  "step": 500
24
  },
25
  {
26
  "epoch": 0.3640776699029126,
27
+ "grad_norm": 0.8658091425895691,
28
  "learning_rate": 9.092012624423404e-05,
29
+ "loss": 0.5603,
30
  "step": 750
31
  },
32
  {
33
  "epoch": 0.4854368932038835,
34
+ "grad_norm": 0.8770154714584351,
35
  "learning_rate": 8.788540907987377e-05,
36
+ "loss": 0.5165,
37
  "step": 1000
38
  },
39
  {
40
  "epoch": 0.6067961165048543,
41
+ "grad_norm": 0.6812583208084106,
42
  "learning_rate": 8.485069191551348e-05,
43
+ "loss": 0.5015,
44
  "step": 1250
45
  },
46
  {
47
  "epoch": 0.7281553398058253,
48
+ "grad_norm": 0.6911689043045044,
49
  "learning_rate": 8.181597475115321e-05,
50
+ "loss": 0.4846,
51
  "step": 1500
52
  },
53
  {
54
  "epoch": 0.8495145631067961,
55
+ "grad_norm": 0.6556753516197205,
56
  "learning_rate": 7.878125758679291e-05,
57
+ "loss": 0.4769,
58
  "step": 1750
59
  },
60
  {
61
  "epoch": 0.970873786407767,
62
+ "grad_norm": 0.5876182317733765,
63
  "learning_rate": 7.574654042243264e-05,
64
+ "loss": 0.4701,
65
  "step": 2000
66
  },
67
  {
68
  "epoch": 1.0922330097087378,
69
+ "grad_norm": 0.6175569891929626,
70
  "learning_rate": 7.271182325807235e-05,
71
+ "loss": 0.4616,
72
  "step": 2250
73
  },
74
  {
75
  "epoch": 1.2135922330097086,
76
+ "grad_norm": 0.6353004574775696,
77
  "learning_rate": 6.967710609371208e-05,
78
+ "loss": 0.4518,
79
  "step": 2500
80
  },
81
  {
82
  "epoch": 1.3349514563106797,
83
+ "grad_norm": 0.5879459977149963,
84
  "learning_rate": 6.664238892935178e-05,
85
+ "loss": 0.4483,
86
  "step": 2750
87
  },
88
  {
89
  "epoch": 1.4563106796116505,
90
+ "grad_norm": 0.6575189232826233,
91
  "learning_rate": 6.360767176499151e-05,
92
+ "loss": 0.4367,
93
  "step": 3000
94
  },
95
  {
96
  "epoch": 1.5776699029126213,
97
+ "grad_norm": 0.724533200263977,
98
  "learning_rate": 6.0572954600631224e-05,
99
+ "loss": 0.4226,
100
  "step": 3250
101
  },
102
  {
103
  "epoch": 1.6990291262135924,
104
+ "grad_norm": 0.7686433792114258,
105
  "learning_rate": 5.7538237436270945e-05,
106
+ "loss": 0.4104,
107
  "step": 3500
108
  },
109
  {
110
  "epoch": 1.820388349514563,
111
+ "grad_norm": 0.7101556658744812,
112
  "learning_rate": 5.450352027191066e-05,
113
+ "loss": 0.3954,
114
  "step": 3750
115
  },
116
  {
117
  "epoch": 1.941747572815534,
118
+ "grad_norm": 0.7856088280677795,
119
  "learning_rate": 5.146880310755038e-05,
120
+ "loss": 0.3827,
121
  "step": 4000
122
  },
123
  {
124
  "epoch": 2.063106796116505,
125
+ "grad_norm": 0.8785816431045532,
126
  "learning_rate": 4.84340859431901e-05,
127
+ "loss": 0.3585,
128
  "step": 4250
129
  },
130
  {
131
  "epoch": 2.1844660194174756,
132
+ "grad_norm": 0.858726441860199,
133
  "learning_rate": 4.539936877882982e-05,
134
+ "loss": 0.341,
135
  "step": 4500
136
  },
137
  {
138
  "epoch": 2.3058252427184467,
139
+ "grad_norm": 0.8789017200469971,
140
  "learning_rate": 4.236465161446954e-05,
141
+ "loss": 0.3313,
142
  "step": 4750
143
  },
144
  {
145
  "epoch": 2.4271844660194173,
146
+ "grad_norm": 0.9984813928604126,
147
  "learning_rate": 3.932993445010925e-05,
148
+ "loss": 0.321,
149
  "step": 5000
150
  },
151
  {
152
  "epoch": 2.5485436893203883,
153
+ "grad_norm": 0.8649771213531494,
154
  "learning_rate": 3.6295217285748975e-05,
155
+ "loss": 0.3104,
156
  "step": 5250
157
  },
158
  {
159
  "epoch": 2.6699029126213594,
160
+ "grad_norm": 0.9905620217323303,
161
  "learning_rate": 3.326050012138869e-05,
162
+ "loss": 0.3008,
163
  "step": 5500
164
  },
165
  {
166
  "epoch": 2.79126213592233,
167
+ "grad_norm": 0.9460727572441101,
168
  "learning_rate": 3.022578295702841e-05,
169
+ "loss": 0.2965,
170
  "step": 5750
171
  },
172
  {
173
  "epoch": 2.912621359223301,
174
+ "grad_norm": 0.8885589241981506,
175
  "learning_rate": 2.7191065792668125e-05,
176
+ "loss": 0.2876,
177
  "step": 6000
178
  },
179
  {
180
  "epoch": 3.033980582524272,
181
+ "grad_norm": 0.9261214733123779,
182
  "learning_rate": 2.4156348628307843e-05,
183
+ "loss": 0.2759,
184
  "step": 6250
185
  },
186
  {
187
  "epoch": 3.1553398058252426,
188
+ "grad_norm": 0.9241772294044495,
189
  "learning_rate": 2.112163146394756e-05,
190
+ "loss": 0.2618,
191
  "step": 6500
192
  },
193
  {
194
  "epoch": 3.2766990291262137,
195
+ "grad_norm": 0.929602861404419,
196
  "learning_rate": 1.808691429958728e-05,
197
+ "loss": 0.2578,
198
  "step": 6750
199
  },
200
  {
201
  "epoch": 3.3980582524271843,
202
+ "grad_norm": 0.9885833263397217,
203
  "learning_rate": 1.5052197135226997e-05,
204
+ "loss": 0.2547,
205
  "step": 7000
206
  },
207
  {
208
  "epoch": 3.5194174757281553,
209
+ "grad_norm": 0.9474493861198425,
210
  "learning_rate": 1.2017479970866715e-05,
211
+ "loss": 0.2528,
212
  "step": 7250
213
  },
214
  {
215
  "epoch": 3.6407766990291264,
216
+ "grad_norm": 0.9105657935142517,
217
  "learning_rate": 8.982762806506435e-06,
218
+ "loss": 0.25,
219
  "step": 7500
220
  },
221
  {
222
  "epoch": 3.762135922330097,
223
+ "grad_norm": 0.9185407161712646,
224
  "learning_rate": 5.948045642146152e-06,
225
+ "loss": 0.2463,
226
  "step": 7750
227
  },
228
  {
229
  "epoch": 3.883495145631068,
230
+ "grad_norm": 0.8763870000839233,
231
  "learning_rate": 2.9133284777858704e-06,
232
+ "loss": 0.2462,
233
  "step": 8000
234
  },
235
  {
236
  "epoch": 4.0,
237
  "step": 8240,
238
  "total_flos": 1.1079720316327956e+18,
239
+ "train_loss": 0.39851856185394585,
240
+ "train_runtime": 97554.2615,
241
+ "train_samples_per_second": 5.406,
242
+ "train_steps_per_second": 0.084
243
  }
244
  ],
245
  "logging_steps": 250,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e2e5010aee0656a196ff6fb1e40552ae9c562641a4e0fd86668ac7f370da2f9
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b207d7b4b95c725e5af7402457b9eb0332219dc968567f69b710daa980d1135f
3
  size 5240