tiennguyenbnbk commited on
Commit
28d11c8
·
verified ·
1 Parent(s): 137e791

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -5
  2. all_results.json +14 -14
  3. test_results.json +9 -9
  4. train_results.json +6 -6
  5. trainer_state.json +505 -467
README.md CHANGED
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [vinai/phobert-base-v2](https://huggingface.co/vinai/phobert-base-v2) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.4661
22
- - Accuracy: 0.9399
23
- - F1 Score: 0.9222
24
- - Recall: 0.9304
25
- - Precision: 0.9146
26
 
27
  ## Model description
28
 
 
18
 
19
  This model is a fine-tuned version of [vinai/phobert-base-v2](https://huggingface.co/vinai/phobert-base-v2) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.4632
22
+ - Accuracy: 0.9408
23
+ - F1 Score: 0.9253
24
+ - Recall: 0.9343
25
+ - Precision: 0.9169
26
 
27
  ## Model description
28
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 32.900432900432904,
3
- "eval_accuracy": 0.9312686769899484,
4
- "eval_f1_score": 0.9167547336443177,
5
- "eval_loss": 0.6494598388671875,
6
- "eval_precision": 0.9175186124029386,
7
- "eval_recall": 0.916118129711967,
8
- "eval_runtime": 6.63,
9
- "eval_samples_per_second": 555.203,
10
- "eval_steps_per_second": 8.748,
11
- "total_flos": 8228992941651000.0,
12
- "train_loss": 0.6119064652292352,
13
- "train_runtime": 3347.1951,
14
- "train_samples_per_second": 152.964,
15
- "train_steps_per_second": 1.195
16
  }
 
1
  {
2
+ "epoch": 34.78260869565217,
3
+ "eval_accuracy": 0.9407608695652174,
4
+ "eval_f1_score": 0.9252844986926706,
5
+ "eval_loss": 0.4632340669631958,
6
+ "eval_precision": 0.9169145670543192,
7
+ "eval_recall": 0.9342956374641919,
8
+ "eval_runtime": 6.9014,
9
+ "eval_samples_per_second": 533.224,
10
+ "eval_steps_per_second": 8.404,
11
+ "total_flos": 8542898522220600.0,
12
+ "train_loss": 0.44249137926101684,
13
+ "train_runtime": 3485.9803,
14
+ "train_samples_per_second": 146.874,
15
+ "train_steps_per_second": 1.147
16
  }
test_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 32.900432900432904,
3
- "eval_accuracy": 0.9312686769899484,
4
- "eval_f1_score": 0.9167547336443177,
5
- "eval_loss": 0.6494598388671875,
6
- "eval_precision": 0.9175186124029386,
7
- "eval_recall": 0.916118129711967,
8
- "eval_runtime": 6.63,
9
- "eval_samples_per_second": 555.203,
10
- "eval_steps_per_second": 8.748
11
  }
 
1
  {
2
+ "epoch": 34.78260869565217,
3
+ "eval_accuracy": 0.9407608695652174,
4
+ "eval_f1_score": 0.9252844986926706,
5
+ "eval_loss": 0.4632340669631958,
6
+ "eval_precision": 0.9169145670543192,
7
+ "eval_recall": 0.9342956374641919,
8
+ "eval_runtime": 6.9014,
9
+ "eval_samples_per_second": 533.224,
10
+ "eval_steps_per_second": 8.404
11
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 32.900432900432904,
3
- "total_flos": 8228992941651000.0,
4
- "train_loss": 0.6119064652292352,
5
- "train_runtime": 3347.1951,
6
- "train_samples_per_second": 152.964,
7
- "train_steps_per_second": 1.195
8
  }
 
1
  {
2
+ "epoch": 34.78260869565217,
3
+ "total_flos": 8542898522220600.0,
4
+ "train_loss": 0.44249137926101684,
5
+ "train_runtime": 3485.9803,
6
+ "train_samples_per_second": 146.874,
7
+ "train_steps_per_second": 1.147
8
  }
trainer_state.json CHANGED
@@ -1,743 +1,781 @@
1
  {
2
- "best_metric": 0.9167547336443177,
3
- "best_model_checkpoint": "cls_comment-phobert-base-v2-v3.2/checkpoint-2800",
4
- "epoch": 32.900432900432904,
5
  "eval_steps": 100,
6
- "global_step": 3800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.8658008658008658,
13
- "grad_norm": 1.4310975074768066,
14
  "learning_rate": 2.5e-06,
15
- "loss": 1.8746,
16
  "step": 100
17
  },
18
  {
19
- "epoch": 0.8658008658008658,
20
- "eval_accuracy": 0.4004346644933442,
21
- "eval_f1_score": 0.08169599556602468,
22
- "eval_loss": 1.7251219749450684,
23
- "eval_precision": 0.057204952070477745,
24
  "eval_recall": 0.14285714285714285,
25
- "eval_runtime": 6.1571,
26
- "eval_samples_per_second": 597.843,
27
- "eval_steps_per_second": 9.42,
28
  "step": 100
29
  },
30
  {
31
- "epoch": 1.7316017316017316,
32
- "grad_norm": 2.5783257484436035,
33
  "learning_rate": 5e-06,
34
- "loss": 1.591,
35
  "step": 200
36
  },
37
  {
38
- "epoch": 1.7316017316017316,
39
- "eval_accuracy": 0.6454767726161369,
40
- "eval_f1_score": 0.28627620756927996,
41
- "eval_loss": 1.3510475158691406,
42
- "eval_precision": 0.2774898302724703,
43
- "eval_recall": 0.3126508980240324,
44
- "eval_runtime": 6.5489,
45
- "eval_samples_per_second": 562.075,
46
- "eval_steps_per_second": 8.856,
47
  "step": 200
48
  },
49
  {
50
- "epoch": 2.5974025974025974,
51
- "grad_norm": 3.353203058242798,
52
  "learning_rate": 7.500000000000001e-06,
53
- "loss": 1.2461,
54
  "step": 300
55
  },
56
  {
57
- "epoch": 2.5974025974025974,
58
- "eval_accuracy": 0.7813094267861994,
59
- "eval_f1_score": 0.5632519066393918,
60
- "eval_loss": 1.0290194749832153,
61
- "eval_precision": 0.5618434026987791,
62
- "eval_recall": 0.5669755803157759,
63
- "eval_runtime": 6.4743,
64
- "eval_samples_per_second": 568.559,
65
- "eval_steps_per_second": 8.959,
66
  "step": 300
67
  },
68
  {
69
- "epoch": 3.463203463203463,
70
- "grad_norm": 4.798435688018799,
71
  "learning_rate": 1e-05,
72
- "loss": 0.9936,
73
  "step": 400
74
  },
75
  {
76
- "epoch": 3.463203463203463,
77
- "eval_accuracy": 0.8280358598207009,
78
- "eval_f1_score": 0.6070778914740699,
79
- "eval_loss": 0.8636265397071838,
80
- "eval_precision": 0.7310493883538899,
81
- "eval_recall": 0.6261307402558046,
82
- "eval_runtime": 6.467,
83
- "eval_samples_per_second": 569.194,
84
- "eval_steps_per_second": 8.969,
85
  "step": 400
86
  },
87
  {
88
- "epoch": 4.329004329004329,
89
- "grad_norm": 3.696899175643921,
90
  "learning_rate": 9.722222222222223e-06,
91
- "loss": 0.846,
92
  "step": 500
93
  },
94
  {
95
- "epoch": 4.329004329004329,
96
- "eval_accuracy": 0.8606356968215159,
97
- "eval_f1_score": 0.6995601762325577,
98
- "eval_loss": 0.7818133234977722,
99
- "eval_precision": 0.7378679571747613,
100
- "eval_recall": 0.7000527411412641,
101
- "eval_runtime": 6.4735,
102
- "eval_samples_per_second": 568.624,
103
- "eval_steps_per_second": 8.96,
104
  "step": 500
105
  },
106
  {
107
- "epoch": 5.194805194805195,
108
- "grad_norm": 3.5592989921569824,
109
  "learning_rate": 9.444444444444445e-06,
110
- "loss": 0.7528,
111
  "step": 600
112
  },
113
  {
114
- "epoch": 5.194805194805195,
115
- "eval_accuracy": 0.8845422439554469,
116
- "eval_f1_score": 0.7880662770504314,
117
- "eval_loss": 0.7183576226234436,
118
- "eval_precision": 0.885479708387116,
119
- "eval_recall": 0.7747483039105137,
120
- "eval_runtime": 6.4771,
121
- "eval_samples_per_second": 568.311,
122
- "eval_steps_per_second": 8.955,
123
  "step": 600
124
  },
125
  {
126
- "epoch": 6.0606060606060606,
127
- "grad_norm": 6.572245121002197,
128
  "learning_rate": 9.166666666666666e-06,
129
- "loss": 0.6829,
130
  "step": 700
131
  },
132
  {
133
- "epoch": 6.0606060606060606,
134
- "eval_accuracy": 0.9065471339309971,
135
- "eval_f1_score": 0.8721427577623805,
136
- "eval_loss": 0.6787899136543274,
137
- "eval_precision": 0.8877491000810773,
138
- "eval_recall": 0.8621653634986893,
139
- "eval_runtime": 6.5011,
140
- "eval_samples_per_second": 566.21,
141
- "eval_steps_per_second": 8.922,
142
  "step": 700
143
  },
144
  {
145
- "epoch": 6.926406926406926,
146
- "grad_norm": 6.824036121368408,
147
  "learning_rate": 8.888888888888888e-06,
148
- "loss": 0.6318,
149
  "step": 800
150
  },
151
  {
152
- "epoch": 6.926406926406926,
153
- "eval_accuracy": 0.9060038033143167,
154
- "eval_f1_score": 0.8771840566593205,
155
- "eval_loss": 0.6685603857040405,
156
- "eval_precision": 0.8735081717688525,
157
- "eval_recall": 0.8843353255680675,
158
- "eval_runtime": 6.4595,
159
- "eval_samples_per_second": 569.856,
160
- "eval_steps_per_second": 8.979,
161
  "step": 800
162
  },
163
  {
164
- "epoch": 7.792207792207792,
165
- "grad_norm": 6.760078430175781,
166
  "learning_rate": 8.611111111111112e-06,
167
- "loss": 0.5946,
168
  "step": 900
169
  },
170
  {
171
- "epoch": 7.792207792207792,
172
- "eval_accuracy": 0.9054604726976365,
173
- "eval_f1_score": 0.883019750226862,
174
- "eval_loss": 0.6709757447242737,
175
- "eval_precision": 0.8941393272626895,
176
- "eval_recall": 0.8778851118086725,
177
- "eval_runtime": 6.449,
178
- "eval_samples_per_second": 570.787,
179
- "eval_steps_per_second": 8.994,
180
  "step": 900
181
  },
182
  {
183
- "epoch": 8.658008658008658,
184
- "grad_norm": 3.9726505279541016,
185
  "learning_rate": 8.333333333333334e-06,
186
- "loss": 0.5787,
187
  "step": 1000
188
  },
189
  {
190
- "epoch": 8.658008658008658,
191
- "eval_accuracy": 0.9231187177397446,
192
- "eval_f1_score": 0.9046349674986128,
193
- "eval_loss": 0.6429829597473145,
194
- "eval_precision": 0.9136176302840415,
195
- "eval_recall": 0.8968630181032563,
196
- "eval_runtime": 6.5577,
197
- "eval_samples_per_second": 561.323,
198
- "eval_steps_per_second": 8.845,
199
  "step": 1000
200
  },
201
  {
202
- "epoch": 9.523809523809524,
203
- "grad_norm": 3.6529064178466797,
204
  "learning_rate": 8.055555555555557e-06,
205
- "loss": 0.5465,
206
  "step": 1100
207
  },
208
  {
209
- "epoch": 9.523809523809524,
210
- "eval_accuracy": 0.9233903830480847,
211
- "eval_f1_score": 0.9029393609974734,
212
- "eval_loss": 0.6390886306762695,
213
- "eval_precision": 0.907538747359747,
214
- "eval_recall": 0.8996010541333774,
215
- "eval_runtime": 6.5373,
216
- "eval_samples_per_second": 563.078,
217
- "eval_steps_per_second": 8.872,
218
  "step": 1100
219
  },
220
  {
221
- "epoch": 10.38961038961039,
222
- "grad_norm": 3.7719361782073975,
223
  "learning_rate": 7.77777777777778e-06,
224
- "loss": 0.5351,
225
  "step": 1200
226
  },
227
  {
228
- "epoch": 10.38961038961039,
229
- "eval_accuracy": 0.9163270850312415,
230
- "eval_f1_score": 0.9009696239334336,
231
- "eval_loss": 0.6590227484703064,
232
- "eval_precision": 0.9032173500628025,
233
- "eval_recall": 0.9028948724760895,
234
- "eval_runtime": 6.4561,
235
- "eval_samples_per_second": 570.16,
236
- "eval_steps_per_second": 8.984,
237
  "step": 1200
238
  },
239
  {
240
- "epoch": 11.255411255411255,
241
- "grad_norm": 5.107598304748535,
242
  "learning_rate": 7.500000000000001e-06,
243
- "loss": 0.5253,
244
  "step": 1300
245
  },
246
  {
247
- "epoch": 11.255411255411255,
248
- "eval_accuracy": 0.9171420809562619,
249
- "eval_f1_score": 0.8992270643983542,
250
- "eval_loss": 0.6565839648246765,
251
- "eval_precision": 0.9002180733865013,
252
- "eval_recall": 0.9017115405053154,
253
- "eval_runtime": 6.4934,
254
- "eval_samples_per_second": 566.885,
255
- "eval_steps_per_second": 8.932,
256
  "step": 1300
257
  },
258
  {
259
- "epoch": 12.121212121212121,
260
- "grad_norm": 1.914890170097351,
261
  "learning_rate": 7.222222222222223e-06,
262
- "loss": 0.5129,
263
  "step": 1400
264
  },
265
  {
266
- "epoch": 12.121212121212121,
267
- "eval_accuracy": 0.9214887258897039,
268
- "eval_f1_score": 0.8995353215155666,
269
- "eval_loss": 0.6489275097846985,
270
- "eval_precision": 0.8852784844592642,
271
- "eval_recall": 0.9156833236681746,
272
- "eval_runtime": 6.4876,
273
- "eval_samples_per_second": 567.393,
274
- "eval_steps_per_second": 8.94,
275
  "step": 1400
276
  },
277
  {
278
- "epoch": 12.987012987012987,
279
- "grad_norm": 4.1629638671875,
280
  "learning_rate": 6.944444444444445e-06,
281
- "loss": 0.507,
282
  "step": 1500
283
  },
284
  {
285
- "epoch": 12.987012987012987,
286
- "eval_accuracy": 0.9187720728063027,
287
- "eval_f1_score": 0.895959427062996,
288
- "eval_loss": 0.6600282192230225,
289
- "eval_precision": 0.8850994841694488,
290
- "eval_recall": 0.9084482921055681,
291
- "eval_runtime": 6.4633,
292
- "eval_samples_per_second": 569.52,
293
- "eval_steps_per_second": 8.974,
294
  "step": 1500
295
  },
296
  {
297
- "epoch": 13.852813852813853,
298
- "grad_norm": 4.176562786102295,
299
  "learning_rate": 6.666666666666667e-06,
300
- "loss": 0.498,
301
  "step": 1600
302
  },
303
  {
304
- "epoch": 13.852813852813853,
305
- "eval_accuracy": 0.926107036131486,
306
- "eval_f1_score": 0.9037600142315002,
307
- "eval_loss": 0.6435835957527161,
308
- "eval_precision": 0.910555642535469,
309
- "eval_recall": 0.8986425893474609,
310
- "eval_runtime": 6.5159,
311
- "eval_samples_per_second": 564.923,
312
- "eval_steps_per_second": 8.901,
313
  "step": 1600
314
  },
315
  {
316
- "epoch": 14.718614718614718,
317
- "grad_norm": 5.224420547485352,
318
  "learning_rate": 6.3888888888888885e-06,
319
- "loss": 0.4928,
320
  "step": 1700
321
  },
322
  {
323
- "epoch": 14.718614718614718,
324
- "eval_accuracy": 0.928280358598207,
325
- "eval_f1_score": 0.9090054325068483,
326
- "eval_loss": 0.6421458125114441,
327
- "eval_precision": 0.9050797676652048,
328
- "eval_recall": 0.9139087669260643,
329
- "eval_runtime": 6.5241,
330
- "eval_samples_per_second": 564.214,
331
- "eval_steps_per_second": 8.89,
332
  "step": 1700
333
  },
334
  {
335
- "epoch": 15.584415584415584,
336
- "grad_norm": 2.502448081970215,
337
  "learning_rate": 6.111111111111112e-06,
338
- "loss": 0.488,
339
  "step": 1800
340
  },
341
  {
342
- "epoch": 15.584415584415584,
343
- "eval_accuracy": 0.9242053789731052,
344
- "eval_f1_score": 0.9069760243433789,
345
- "eval_loss": 0.6526629328727722,
346
- "eval_precision": 0.8984062209172313,
347
- "eval_recall": 0.9170624424146544,
348
- "eval_runtime": 6.5117,
349
- "eval_samples_per_second": 565.294,
350
- "eval_steps_per_second": 8.907,
351
  "step": 1800
352
  },
353
  {
354
- "epoch": 16.450216450216452,
355
- "grad_norm": 5.522485256195068,
356
  "learning_rate": 5.833333333333334e-06,
357
- "loss": 0.4855,
358
  "step": 1900
359
  },
360
  {
361
- "epoch": 16.450216450216452,
362
- "eval_accuracy": 0.9288236892148872,
363
- "eval_f1_score": 0.9103550521971505,
364
- "eval_loss": 0.6503170728683472,
365
- "eval_precision": 0.910387283571616,
366
- "eval_recall": 0.9118040778151767,
367
- "eval_runtime": 6.4567,
368
- "eval_samples_per_second": 570.109,
369
- "eval_steps_per_second": 8.983,
370
  "step": 1900
371
  },
372
  {
373
- "epoch": 17.316017316017316,
374
- "grad_norm": 1.1161519289016724,
375
  "learning_rate": 5.555555555555557e-06,
376
- "loss": 0.4827,
377
  "step": 2000
378
  },
379
  {
380
- "epoch": 17.316017316017316,
381
- "eval_accuracy": 0.9263787014398261,
382
- "eval_f1_score": 0.9082943419356964,
383
- "eval_loss": 0.6522781252861023,
384
- "eval_precision": 0.9101325540545261,
385
- "eval_recall": 0.9076634535056071,
386
- "eval_runtime": 6.493,
387
- "eval_samples_per_second": 566.921,
388
- "eval_steps_per_second": 8.933,
389
  "step": 2000
390
  },
391
  {
392
- "epoch": 18.181818181818183,
393
- "grad_norm": 4.889358997344971,
394
  "learning_rate": 5.2777777777777785e-06,
395
- "loss": 0.4806,
396
  "step": 2100
397
  },
398
  {
399
- "epoch": 18.181818181818183,
400
- "eval_accuracy": 0.9225753871230644,
401
- "eval_f1_score": 0.9070433565843993,
402
- "eval_loss": 0.6726859211921692,
403
- "eval_precision": 0.9069272643426315,
404
- "eval_recall": 0.9107096842915502,
405
- "eval_runtime": 6.4653,
406
- "eval_samples_per_second": 569.347,
407
- "eval_steps_per_second": 8.971,
408
  "step": 2100
409
  },
410
  {
411
- "epoch": 19.047619047619047,
412
- "grad_norm": 6.80066442489624,
413
  "learning_rate": 5e-06,
414
- "loss": 0.475,
415
  "step": 2200
416
  },
417
  {
418
- "epoch": 19.047619047619047,
419
- "eval_accuracy": 0.9187720728063027,
420
- "eval_f1_score": 0.8987625395496276,
421
- "eval_loss": 0.6789355278015137,
422
- "eval_precision": 0.8924568384004198,
423
- "eval_recall": 0.9075582377741745,
424
- "eval_runtime": 6.491,
425
- "eval_samples_per_second": 567.09,
426
- "eval_steps_per_second": 8.935,
427
  "step": 2200
428
  },
429
  {
430
- "epoch": 19.913419913419915,
431
- "grad_norm": 3.8123650550842285,
432
  "learning_rate": 4.722222222222222e-06,
433
- "loss": 0.4769,
434
  "step": 2300
435
  },
436
  {
437
- "epoch": 19.913419913419915,
438
- "eval_accuracy": 0.923933713664765,
439
- "eval_f1_score": 0.9047374447084814,
440
- "eval_loss": 0.6616267561912537,
441
- "eval_precision": 0.8994764750545533,
442
- "eval_recall": 0.9115051728602043,
443
- "eval_runtime": 6.4602,
444
- "eval_samples_per_second": 569.799,
445
- "eval_steps_per_second": 8.978,
446
  "step": 2300
447
  },
448
  {
449
- "epoch": 20.77922077922078,
450
- "grad_norm": 1.6108217239379883,
451
  "learning_rate": 4.444444444444444e-06,
452
- "loss": 0.4717,
453
  "step": 2400
454
  },
455
  {
456
- "epoch": 20.77922077922078,
457
- "eval_accuracy": 0.9266503667481663,
458
- "eval_f1_score": 0.9044367225449236,
459
- "eval_loss": 0.6538987159729004,
460
- "eval_precision": 0.904842224712077,
461
- "eval_recall": 0.9051169350428675,
462
- "eval_runtime": 6.4574,
463
- "eval_samples_per_second": 570.043,
464
- "eval_steps_per_second": 8.982,
465
  "step": 2400
466
  },
467
  {
468
- "epoch": 21.645021645021647,
469
- "grad_norm": 6.592935562133789,
470
  "learning_rate": 4.166666666666667e-06,
471
- "loss": 0.4714,
472
  "step": 2500
473
  },
474
  {
475
- "epoch": 21.645021645021647,
476
- "eval_accuracy": 0.9285520239065471,
477
- "eval_f1_score": 0.9087470694900519,
478
- "eval_loss": 0.6579604148864746,
479
- "eval_precision": 0.8978341455168631,
480
- "eval_recall": 0.9209883408258847,
481
- "eval_runtime": 6.5174,
482
- "eval_samples_per_second": 564.798,
483
- "eval_steps_per_second": 8.899,
484
  "step": 2500
485
  },
486
  {
487
- "epoch": 22.51082251082251,
488
- "grad_norm": 0.9003086090087891,
489
  "learning_rate": 3.88888888888889e-06,
490
- "loss": 0.4674,
491
  "step": 2600
492
  },
493
  {
494
- "epoch": 22.51082251082251,
495
- "eval_accuracy": 0.9280086932898669,
496
- "eval_f1_score": 0.9102326432475396,
497
- "eval_loss": 0.653831958770752,
498
- "eval_precision": 0.9087887802985566,
499
- "eval_recall": 0.9121197185082034,
500
- "eval_runtime": 6.5375,
501
- "eval_samples_per_second": 563.063,
502
- "eval_steps_per_second": 8.872,
503
  "step": 2600
504
  },
505
  {
506
- "epoch": 23.376623376623378,
507
- "grad_norm": 1.9655799865722656,
508
  "learning_rate": 3.6111111111111115e-06,
509
- "loss": 0.4647,
510
  "step": 2700
511
  },
512
  {
513
- "epoch": 23.376623376623378,
514
- "eval_accuracy": 0.9236620483564248,
515
- "eval_f1_score": 0.9094309387183797,
516
- "eval_loss": 0.6711372137069702,
517
- "eval_precision": 0.9020675048042391,
518
- "eval_recall": 0.918588197629796,
519
- "eval_runtime": 6.5134,
520
- "eval_samples_per_second": 565.141,
521
- "eval_steps_per_second": 8.905,
522
  "step": 2700
523
  },
524
  {
525
- "epoch": 24.242424242424242,
526
- "grad_norm": 0.5603305697441101,
527
  "learning_rate": 3.3333333333333333e-06,
528
- "loss": 0.4641,
529
  "step": 2800
530
  },
531
  {
532
- "epoch": 24.242424242424242,
533
- "eval_accuracy": 0.9312686769899484,
534
- "eval_f1_score": 0.9167547336443177,
535
- "eval_loss": 0.6494598388671875,
536
- "eval_precision": 0.9175186124029386,
537
- "eval_recall": 0.916118129711967,
538
- "eval_runtime": 6.4912,
539
- "eval_samples_per_second": 567.075,
540
- "eval_steps_per_second": 8.935,
541
  "step": 2800
542
  },
543
  {
544
- "epoch": 25.10822510822511,
545
- "grad_norm": 1.2995818853378296,
546
  "learning_rate": 3.055555555555556e-06,
547
- "loss": 0.4661,
548
  "step": 2900
549
  },
550
  {
551
- "epoch": 25.10822510822511,
552
- "eval_accuracy": 0.9290953545232273,
553
- "eval_f1_score": 0.9106664499520731,
554
- "eval_loss": 0.652369499206543,
555
- "eval_precision": 0.9096933035627524,
556
- "eval_recall": 0.9120934117697305,
557
- "eval_runtime": 6.4925,
558
- "eval_samples_per_second": 566.963,
559
- "eval_steps_per_second": 8.933,
560
  "step": 2900
561
  },
562
  {
563
- "epoch": 25.974025974025974,
564
- "grad_norm": 10.505922317504883,
565
  "learning_rate": 2.7777777777777783e-06,
566
- "loss": 0.4642,
567
  "step": 3000
568
  },
569
  {
570
- "epoch": 25.974025974025974,
571
- "eval_accuracy": 0.9271936973648465,
572
- "eval_f1_score": 0.9085564052221086,
573
- "eval_loss": 0.6616336107254028,
574
- "eval_precision": 0.9023635662998748,
575
- "eval_recall": 0.9157819747531982,
576
- "eval_runtime": 6.4399,
577
- "eval_samples_per_second": 571.589,
578
- "eval_steps_per_second": 9.006,
579
  "step": 3000
580
  },
581
  {
582
- "epoch": 26.83982683982684,
583
- "grad_norm": 4.1924147605896,
584
  "learning_rate": 2.5e-06,
585
- "loss": 0.4634,
586
  "step": 3100
587
  },
588
  {
589
- "epoch": 26.83982683982684,
590
- "eval_accuracy": 0.9271936973648465,
591
- "eval_f1_score": 0.9100073358373931,
592
- "eval_loss": 0.6607591509819031,
593
- "eval_precision": 0.9048187850917044,
594
- "eval_recall": 0.9157366847293898,
595
- "eval_runtime": 6.5377,
596
- "eval_samples_per_second": 563.04,
597
- "eval_steps_per_second": 8.872,
598
  "step": 3100
599
  },
600
  {
601
- "epoch": 27.705627705627705,
602
- "grad_norm": 7.721441268920898,
603
  "learning_rate": 2.222222222222222e-06,
604
- "loss": 0.4621,
605
  "step": 3200
606
  },
607
  {
608
- "epoch": 27.705627705627705,
609
- "eval_accuracy": 0.9301820157565879,
610
- "eval_f1_score": 0.9127105272463062,
611
- "eval_loss": 0.6603537797927856,
612
- "eval_precision": 0.9082151611261499,
613
- "eval_recall": 0.9180282237862402,
614
- "eval_runtime": 6.4947,
615
- "eval_samples_per_second": 566.767,
616
- "eval_steps_per_second": 8.93,
617
  "step": 3200
618
  },
619
  {
620
- "epoch": 28.571428571428573,
621
- "grad_norm": 1.0750694274902344,
622
  "learning_rate": 1.944444444444445e-06,
623
- "loss": 0.4607,
624
  "step": 3300
625
  },
626
  {
627
- "epoch": 28.571428571428573,
628
- "eval_accuracy": 0.9274653626731867,
629
- "eval_f1_score": 0.9094908279169074,
630
- "eval_loss": 0.667899489402771,
631
- "eval_precision": 0.9042256864356671,
632
- "eval_recall": 0.9157890000544793,
633
- "eval_runtime": 6.482,
634
- "eval_samples_per_second": 567.883,
635
- "eval_steps_per_second": 8.948,
636
  "step": 3300
637
  },
638
  {
639
- "epoch": 29.437229437229437,
640
- "grad_norm": 2.3283915519714355,
641
  "learning_rate": 1.6666666666666667e-06,
642
- "loss": 0.4605,
643
  "step": 3400
644
  },
645
  {
646
- "epoch": 29.437229437229437,
647
- "eval_accuracy": 0.9293670198315676,
648
- "eval_f1_score": 0.9127762875853428,
649
- "eval_loss": 0.6584461331367493,
650
- "eval_precision": 0.9121226216148021,
651
- "eval_recall": 0.9142481162001624,
652
- "eval_runtime": 6.4486,
653
- "eval_samples_per_second": 570.824,
654
- "eval_steps_per_second": 8.994,
655
  "step": 3400
656
  },
657
  {
658
- "epoch": 30.303030303030305,
659
- "grad_norm": 4.329758644104004,
660
  "learning_rate": 1.3888888888888892e-06,
661
- "loss": 0.4594,
662
  "step": 3500
663
  },
664
  {
665
- "epoch": 30.303030303030305,
666
- "eval_accuracy": 0.9274653626731867,
667
- "eval_f1_score": 0.9102966977526281,
668
- "eval_loss": 0.6636437773704529,
669
- "eval_precision": 0.9077902692921593,
670
- "eval_recall": 0.9134549746607524,
671
- "eval_runtime": 6.4772,
672
- "eval_samples_per_second": 568.299,
673
- "eval_steps_per_second": 8.954,
674
  "step": 3500
675
  },
676
  {
677
- "epoch": 31.16883116883117,
678
- "grad_norm": 3.934568166732788,
679
  "learning_rate": 1.111111111111111e-06,
680
- "loss": 0.4603,
681
  "step": 3600
682
  },
683
  {
684
- "epoch": 31.16883116883117,
685
- "eval_accuracy": 0.928280358598207,
686
- "eval_f1_score": 0.9114072690530889,
687
- "eval_loss": 0.6612041592597961,
688
- "eval_precision": 0.9094899873929566,
689
- "eval_recall": 0.914101214818058,
690
- "eval_runtime": 6.4876,
691
- "eval_samples_per_second": 567.392,
692
- "eval_steps_per_second": 8.94,
693
  "step": 3600
694
  },
695
  {
696
- "epoch": 32.03463203463203,
697
- "grad_norm": 0.5543671250343323,
698
  "learning_rate": 8.333333333333333e-07,
699
- "loss": 0.4589,
700
  "step": 3700
701
  },
702
  {
703
- "epoch": 32.03463203463203,
704
- "eval_accuracy": 0.928280358598207,
705
- "eval_f1_score": 0.9107083833184353,
706
- "eval_loss": 0.6628167629241943,
707
- "eval_precision": 0.9029268059220896,
708
- "eval_recall": 0.9195858845392287,
709
- "eval_runtime": 6.5055,
710
- "eval_samples_per_second": 565.83,
711
- "eval_steps_per_second": 8.916,
712
  "step": 3700
713
  },
714
  {
715
- "epoch": 32.900432900432904,
716
- "grad_norm": 0.0647527351975441,
717
  "learning_rate": 5.555555555555555e-07,
718
- "loss": 0.4594,
719
  "step": 3800
720
  },
721
  {
722
- "epoch": 32.900432900432904,
723
- "eval_accuracy": 0.9293670198315676,
724
- "eval_f1_score": 0.9120990650711008,
725
- "eval_loss": 0.6589987277984619,
726
- "eval_precision": 0.9101638270119666,
727
- "eval_recall": 0.9147659563783218,
728
- "eval_runtime": 6.5732,
729
- "eval_samples_per_second": 560.0,
730
- "eval_steps_per_second": 8.824,
731
  "step": 3800
732
  },
733
  {
734
- "epoch": 32.900432900432904,
735
- "step": 3800,
736
- "total_flos": 8228992941651000.0,
737
- "train_loss": 0.6119064652292352,
738
- "train_runtime": 3347.1951,
739
- "train_samples_per_second": 152.964,
740
- "train_steps_per_second": 1.195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
  }
742
  ],
743
  "logging_steps": 100,
@@ -745,7 +783,7 @@
745
  "num_input_tokens_seen": 0,
746
  "num_train_epochs": 35,
747
  "save_steps": 100,
748
- "total_flos": 8228992941651000.0,
749
  "train_batch_size": 64,
750
  "trial_name": null,
751
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9252844986926706,
3
+ "best_model_checkpoint": "cls_comment-phobert-base-v2-v3.2/checkpoint-3100",
4
+ "epoch": 34.78260869565217,
5
  "eval_steps": 100,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.8695652173913043,
13
+ "grad_norm": 1.2893089056015015,
14
  "learning_rate": 2.5e-06,
15
+ "loss": 1.854,
16
  "step": 100
17
  },
18
  {
19
+ "epoch": 0.8695652173913043,
20
+ "eval_accuracy": 0.39945652173913043,
21
+ "eval_f1_score": 0.08155339805825243,
22
+ "eval_loss": 1.6864124536514282,
23
+ "eval_precision": 0.057065217391304345,
24
  "eval_recall": 0.14285714285714285,
25
+ "eval_runtime": 6.5919,
26
+ "eval_samples_per_second": 558.265,
27
+ "eval_steps_per_second": 8.799,
28
  "step": 100
29
  },
30
  {
31
+ "epoch": 1.7391304347826086,
32
+ "grad_norm": 2.0746519565582275,
33
  "learning_rate": 5e-06,
34
+ "loss": 1.5604,
35
  "step": 200
36
  },
37
  {
38
+ "epoch": 1.7391304347826086,
39
+ "eval_accuracy": 0.6105978260869566,
40
+ "eval_f1_score": 0.25042278548863744,
41
+ "eval_loss": 1.309004306793213,
42
+ "eval_precision": 0.26708593024189137,
43
+ "eval_recall": 0.28186218782707295,
44
+ "eval_runtime": 7.0459,
45
+ "eval_samples_per_second": 522.292,
46
+ "eval_steps_per_second": 8.232,
47
  "step": 200
48
  },
49
  {
50
+ "epoch": 2.608695652173913,
51
+ "grad_norm": 4.827264785766602,
52
  "learning_rate": 7.500000000000001e-06,
53
+ "loss": 1.1923,
54
  "step": 300
55
  },
56
  {
57
+ "epoch": 2.608695652173913,
58
+ "eval_accuracy": 0.7864130434782609,
59
+ "eval_f1_score": 0.5714767047654633,
60
+ "eval_loss": 0.9517147541046143,
61
+ "eval_precision": 0.5673620359300847,
62
+ "eval_recall": 0.5777445829004313,
63
+ "eval_runtime": 6.8687,
64
+ "eval_samples_per_second": 535.761,
65
+ "eval_steps_per_second": 8.444,
66
  "step": 300
67
  },
68
  {
69
+ "epoch": 3.4782608695652173,
70
+ "grad_norm": 4.206140041351318,
71
  "learning_rate": 1e-05,
72
+ "loss": 0.9214,
73
  "step": 400
74
  },
75
  {
76
+ "epoch": 3.4782608695652173,
77
+ "eval_accuracy": 0.8372282608695653,
78
+ "eval_f1_score": 0.612409568465097,
79
+ "eval_loss": 0.7528494000434875,
80
+ "eval_precision": 0.5976542381939991,
81
+ "eval_recall": 0.6291693604106213,
82
+ "eval_runtime": 6.9093,
83
+ "eval_samples_per_second": 532.619,
84
+ "eval_steps_per_second": 8.395,
85
  "step": 400
86
  },
87
  {
88
+ "epoch": 4.3478260869565215,
89
+ "grad_norm": 6.886340618133545,
90
  "learning_rate": 9.722222222222223e-06,
91
+ "loss": 0.758,
92
  "step": 500
93
  },
94
  {
95
+ "epoch": 4.3478260869565215,
96
+ "eval_accuracy": 0.8668478260869565,
97
+ "eval_f1_score": 0.6659473695330357,
98
+ "eval_loss": 0.632614016532898,
99
+ "eval_precision": 0.7570108636149265,
100
+ "eval_recall": 0.6622064392166015,
101
+ "eval_runtime": 6.8829,
102
+ "eval_samples_per_second": 534.659,
103
+ "eval_steps_per_second": 8.427,
104
  "step": 500
105
  },
106
  {
107
+ "epoch": 5.217391304347826,
108
+ "grad_norm": 4.610263347625732,
109
  "learning_rate": 9.444444444444445e-06,
110
+ "loss": 0.6389,
111
  "step": 600
112
  },
113
  {
114
+ "epoch": 5.217391304347826,
115
+ "eval_accuracy": 0.8913043478260869,
116
+ "eval_f1_score": 0.7803940110099798,
117
+ "eval_loss": 0.5609962940216064,
118
+ "eval_precision": 0.8994163127752409,
119
+ "eval_recall": 0.7578106168705178,
120
+ "eval_runtime": 6.9524,
121
+ "eval_samples_per_second": 529.313,
122
+ "eval_steps_per_second": 8.342,
123
  "step": 600
124
  },
125
  {
126
+ "epoch": 6.086956521739131,
127
+ "grad_norm": 6.5632171630859375,
128
  "learning_rate": 9.166666666666666e-06,
129
+ "loss": 0.5577,
130
  "step": 700
131
  },
132
  {
133
+ "epoch": 6.086956521739131,
134
+ "eval_accuracy": 0.9097826086956522,
135
+ "eval_f1_score": 0.8772239546861178,
136
+ "eval_loss": 0.518897533416748,
137
+ "eval_precision": 0.8848760614607966,
138
+ "eval_recall": 0.8751998390346817,
139
+ "eval_runtime": 6.9449,
140
+ "eval_samples_per_second": 529.887,
141
+ "eval_steps_per_second": 8.351,
142
  "step": 700
143
  },
144
  {
145
+ "epoch": 6.956521739130435,
146
+ "grad_norm": 5.276834487915039,
147
  "learning_rate": 8.888888888888888e-06,
148
+ "loss": 0.4924,
149
  "step": 800
150
  },
151
  {
152
+ "epoch": 6.956521739130435,
153
+ "eval_accuracy": 0.9157608695652174,
154
+ "eval_f1_score": 0.8882818429280892,
155
+ "eval_loss": 0.4865158498287201,
156
+ "eval_precision": 0.8951806726540267,
157
+ "eval_recall": 0.8830561339196462,
158
+ "eval_runtime": 6.9591,
159
+ "eval_samples_per_second": 528.805,
160
+ "eval_steps_per_second": 8.334,
161
  "step": 800
162
  },
163
  {
164
+ "epoch": 7.826086956521739,
165
+ "grad_norm": 5.703993320465088,
166
  "learning_rate": 8.611111111111112e-06,
167
+ "loss": 0.4466,
168
  "step": 900
169
  },
170
  {
171
+ "epoch": 7.826086956521739,
172
+ "eval_accuracy": 0.9233695652173913,
173
+ "eval_f1_score": 0.901100351532045,
174
+ "eval_loss": 0.4718396067619324,
175
+ "eval_precision": 0.9035627892941535,
176
+ "eval_recall": 0.8999623519233246,
177
+ "eval_runtime": 6.8799,
178
+ "eval_samples_per_second": 534.892,
179
+ "eval_steps_per_second": 8.43,
180
  "step": 900
181
  },
182
  {
183
+ "epoch": 8.695652173913043,
184
+ "grad_norm": 4.013239860534668,
185
  "learning_rate": 8.333333333333334e-06,
186
+ "loss": 0.4074,
187
  "step": 1000
188
  },
189
  {
190
+ "epoch": 8.695652173913043,
191
+ "eval_accuracy": 0.9241847826086956,
192
+ "eval_f1_score": 0.9036524938306566,
193
+ "eval_loss": 0.46144258975982666,
194
+ "eval_precision": 0.9074895946702142,
195
+ "eval_recall": 0.9022880305381644,
196
+ "eval_runtime": 6.8809,
197
+ "eval_samples_per_second": 534.81,
198
+ "eval_steps_per_second": 8.429,
199
  "step": 1000
200
  },
201
  {
202
+ "epoch": 9.565217391304348,
203
+ "grad_norm": 5.082529067993164,
204
  "learning_rate": 8.055555555555557e-06,
205
+ "loss": 0.3986,
206
  "step": 1100
207
  },
208
  {
209
+ "epoch": 9.565217391304348,
210
+ "eval_accuracy": 0.9236413043478261,
211
+ "eval_f1_score": 0.9049050960720594,
212
+ "eval_loss": 0.4672768712043762,
213
+ "eval_precision": 0.898138404827953,
214
+ "eval_recall": 0.9136899119280064,
215
+ "eval_runtime": 6.9211,
216
+ "eval_samples_per_second": 531.711,
217
+ "eval_steps_per_second": 8.38,
218
  "step": 1100
219
  },
220
  {
221
+ "epoch": 10.434782608695652,
222
+ "grad_norm": 7.0753984451293945,
223
  "learning_rate": 7.77777777777778e-06,
224
+ "loss": 0.3673,
225
  "step": 1200
226
  },
227
  {
228
+ "epoch": 10.434782608695652,
229
+ "eval_accuracy": 0.9307065217391305,
230
+ "eval_f1_score": 0.9133756100183478,
231
+ "eval_loss": 0.45040827989578247,
232
+ "eval_precision": 0.9213133583473806,
233
+ "eval_recall": 0.9059425908375184,
234
+ "eval_runtime": 6.9128,
235
+ "eval_samples_per_second": 532.346,
236
+ "eval_steps_per_second": 8.39,
237
  "step": 1200
238
  },
239
  {
240
+ "epoch": 11.304347826086957,
241
+ "grad_norm": 3.027184247970581,
242
  "learning_rate": 7.500000000000001e-06,
243
+ "loss": 0.3579,
244
  "step": 1300
245
  },
246
  {
247
+ "epoch": 11.304347826086957,
248
+ "eval_accuracy": 0.9315217391304348,
249
+ "eval_f1_score": 0.9144976601484841,
250
+ "eval_loss": 0.447768896818161,
251
+ "eval_precision": 0.9123115041297183,
252
+ "eval_recall": 0.9179810133876928,
253
+ "eval_runtime": 6.8813,
254
+ "eval_samples_per_second": 534.78,
255
+ "eval_steps_per_second": 8.429,
256
  "step": 1300
257
  },
258
  {
259
+ "epoch": 12.173913043478262,
260
+ "grad_norm": 6.544119834899902,
261
  "learning_rate": 7.222222222222223e-06,
262
+ "loss": 0.3408,
263
  "step": 1400
264
  },
265
  {
266
+ "epoch": 12.173913043478262,
267
+ "eval_accuracy": 0.9315217391304348,
268
+ "eval_f1_score": 0.9125839928840032,
269
+ "eval_loss": 0.44630134105682373,
270
+ "eval_precision": 0.9160798318219715,
271
+ "eval_recall": 0.9101140451493557,
272
+ "eval_runtime": 6.8705,
273
+ "eval_samples_per_second": 535.626,
274
+ "eval_steps_per_second": 8.442,
275
  "step": 1400
276
  },
277
  {
278
+ "epoch": 13.043478260869565,
279
+ "grad_norm": 1.1837869882583618,
280
  "learning_rate": 6.944444444444445e-06,
281
+ "loss": 0.3316,
282
  "step": 1500
283
  },
284
  {
285
+ "epoch": 13.043478260869565,
286
+ "eval_accuracy": 0.9304347826086956,
287
+ "eval_f1_score": 0.9114310919979555,
288
+ "eval_loss": 0.4618222713470459,
289
+ "eval_precision": 0.901534612430018,
290
+ "eval_recall": 0.9233297132434791,
291
+ "eval_runtime": 6.8807,
292
+ "eval_samples_per_second": 534.826,
293
+ "eval_steps_per_second": 8.429,
294
  "step": 1500
295
  },
296
  {
297
+ "epoch": 13.91304347826087,
298
+ "grad_norm": 4.6852030754089355,
299
  "learning_rate": 6.666666666666667e-06,
300
+ "loss": 0.321,
301
  "step": 1600
302
  },
303
  {
304
+ "epoch": 13.91304347826087,
305
+ "eval_accuracy": 0.936141304347826,
306
+ "eval_f1_score": 0.9177928085220054,
307
+ "eval_loss": 0.4429613947868347,
308
+ "eval_precision": 0.9162566862756574,
309
+ "eval_recall": 0.9203372711492189,
310
+ "eval_runtime": 6.8962,
311
+ "eval_samples_per_second": 533.625,
312
+ "eval_steps_per_second": 8.41,
313
  "step": 1600
314
  },
315
  {
316
+ "epoch": 14.782608695652174,
317
+ "grad_norm": 2.1539549827575684,
318
  "learning_rate": 6.3888888888888885e-06,
319
+ "loss": 0.3113,
320
  "step": 1700
321
  },
322
  {
323
+ "epoch": 14.782608695652174,
324
+ "eval_accuracy": 0.9394021739130435,
325
+ "eval_f1_score": 0.9206336007621322,
326
+ "eval_loss": 0.4418139159679413,
327
+ "eval_precision": 0.9180154745670104,
328
+ "eval_recall": 0.9233295456017971,
329
+ "eval_runtime": 6.9165,
330
+ "eval_samples_per_second": 532.064,
331
+ "eval_steps_per_second": 8.386,
332
  "step": 1700
333
  },
334
  {
335
+ "epoch": 15.652173913043478,
336
+ "grad_norm": 2.5465943813323975,
337
  "learning_rate": 6.111111111111112e-06,
338
+ "loss": 0.3085,
339
  "step": 1800
340
  },
341
  {
342
+ "epoch": 15.652173913043478,
343
+ "eval_accuracy": 0.9391304347826087,
344
+ "eval_f1_score": 0.9213651720221497,
345
+ "eval_loss": 0.4470200836658478,
346
+ "eval_precision": 0.9206502407175643,
347
+ "eval_recall": 0.922553969144098,
348
+ "eval_runtime": 6.9262,
349
+ "eval_samples_per_second": 531.315,
350
+ "eval_steps_per_second": 8.374,
351
  "step": 1800
352
  },
353
  {
354
+ "epoch": 16.52173913043478,
355
+ "grad_norm": 8.969688415527344,
356
  "learning_rate": 5.833333333333334e-06,
357
+ "loss": 0.304,
358
  "step": 1900
359
  },
360
  {
361
+ "epoch": 16.52173913043478,
362
+ "eval_accuracy": 0.9369565217391305,
363
+ "eval_f1_score": 0.9171053942846182,
364
+ "eval_loss": 0.45003601908683777,
365
+ "eval_precision": 0.9217182621997301,
366
+ "eval_recall": 0.91362601629383,
367
+ "eval_runtime": 6.9417,
368
+ "eval_samples_per_second": 530.127,
369
+ "eval_steps_per_second": 8.355,
370
  "step": 1900
371
  },
372
  {
373
+ "epoch": 17.391304347826086,
374
+ "grad_norm": 3.3478808403015137,
375
  "learning_rate": 5.555555555555557e-06,
376
+ "loss": 0.2967,
377
  "step": 2000
378
  },
379
  {
380
+ "epoch": 17.391304347826086,
381
+ "eval_accuracy": 0.9345108695652173,
382
+ "eval_f1_score": 0.914860205961535,
383
+ "eval_loss": 0.4604756832122803,
384
+ "eval_precision": 0.9174677942056463,
385
+ "eval_recall": 0.9135354812885799,
386
+ "eval_runtime": 6.887,
387
+ "eval_samples_per_second": 534.339,
388
+ "eval_steps_per_second": 8.422,
389
  "step": 2000
390
  },
391
  {
392
+ "epoch": 18.26086956521739,
393
+ "grad_norm": 3.5150227546691895,
394
  "learning_rate": 5.2777777777777785e-06,
395
+ "loss": 0.2956,
396
  "step": 2100
397
  },
398
  {
399
+ "epoch": 18.26086956521739,
400
+ "eval_accuracy": 0.9347826086956522,
401
+ "eval_f1_score": 0.9144840902438658,
402
+ "eval_loss": 0.4595490097999573,
403
+ "eval_precision": 0.9061017033815302,
404
+ "eval_recall": 0.9237607012127841,
405
+ "eval_runtime": 6.9021,
406
+ "eval_samples_per_second": 533.174,
407
+ "eval_steps_per_second": 8.403,
408
  "step": 2100
409
  },
410
  {
411
+ "epoch": 19.130434782608695,
412
+ "grad_norm": 4.217600345611572,
413
  "learning_rate": 5e-06,
414
+ "loss": 0.2874,
415
  "step": 2200
416
  },
417
  {
418
+ "epoch": 19.130434782608695,
419
+ "eval_accuracy": 0.9377717391304348,
420
+ "eval_f1_score": 0.9185420530822453,
421
+ "eval_loss": 0.46195611357688904,
422
+ "eval_precision": 0.9178792739937973,
423
+ "eval_recall": 0.9200392265430211,
424
+ "eval_runtime": 6.9164,
425
+ "eval_samples_per_second": 532.068,
426
+ "eval_steps_per_second": 8.386,
427
  "step": 2200
428
  },
429
  {
430
+ "epoch": 20.0,
431
+ "grad_norm": 0.156525120139122,
432
  "learning_rate": 4.722222222222222e-06,
433
+ "loss": 0.2891,
434
  "step": 2300
435
  },
436
  {
437
+ "epoch": 20.0,
438
+ "eval_accuracy": 0.936141304347826,
439
+ "eval_f1_score": 0.9167307484171513,
440
+ "eval_loss": 0.46016111969947815,
441
+ "eval_precision": 0.9166336381524655,
442
+ "eval_recall": 0.9182092584681624,
443
+ "eval_runtime": 6.8971,
444
+ "eval_samples_per_second": 533.56,
445
+ "eval_steps_per_second": 8.409,
446
  "step": 2300
447
  },
448
  {
449
+ "epoch": 20.869565217391305,
450
+ "grad_norm": 4.048396587371826,
451
  "learning_rate": 4.444444444444444e-06,
452
+ "loss": 0.2862,
453
  "step": 2400
454
  },
455
  {
456
+ "epoch": 20.869565217391305,
457
+ "eval_accuracy": 0.933695652173913,
458
+ "eval_f1_score": 0.9149466518195882,
459
+ "eval_loss": 0.4600348472595215,
460
+ "eval_precision": 0.9133193957379392,
461
+ "eval_recall": 0.9168769591028961,
462
+ "eval_runtime": 6.9003,
463
+ "eval_samples_per_second": 533.307,
464
+ "eval_steps_per_second": 8.405,
465
  "step": 2400
466
  },
467
  {
468
+ "epoch": 21.73913043478261,
469
+ "grad_norm": 5.180253505706787,
470
  "learning_rate": 4.166666666666667e-06,
471
+ "loss": 0.2851,
472
  "step": 2500
473
  },
474
  {
475
+ "epoch": 21.73913043478261,
476
+ "eval_accuracy": 0.9372282608695652,
477
+ "eval_f1_score": 0.9184411791946457,
478
+ "eval_loss": 0.45560210943222046,
479
+ "eval_precision": 0.9094890172985339,
480
+ "eval_recall": 0.928221748402003,
481
+ "eval_runtime": 6.865,
482
+ "eval_samples_per_second": 536.051,
483
+ "eval_steps_per_second": 8.449,
484
  "step": 2500
485
  },
486
  {
487
+ "epoch": 22.608695652173914,
488
+ "grad_norm": 8.518896102905273,
489
  "learning_rate": 3.88888888888889e-06,
490
+ "loss": 0.2798,
491
  "step": 2600
492
  },
493
  {
494
+ "epoch": 22.608695652173914,
495
+ "eval_accuracy": 0.9404891304347827,
496
+ "eval_f1_score": 0.9222790200318222,
497
+ "eval_loss": 0.45864006876945496,
498
+ "eval_precision": 0.9155654158942171,
499
+ "eval_recall": 0.9296298254069438,
500
+ "eval_runtime": 6.9078,
501
+ "eval_samples_per_second": 532.73,
502
+ "eval_steps_per_second": 8.396,
503
  "step": 2600
504
  },
505
  {
506
+ "epoch": 23.47826086956522,
507
+ "grad_norm": 0.2805568277835846,
508
  "learning_rate": 3.6111111111111115e-06,
509
+ "loss": 0.2787,
510
  "step": 2700
511
  },
512
  {
513
+ "epoch": 23.47826086956522,
514
+ "eval_accuracy": 0.9407608695652174,
515
+ "eval_f1_score": 0.9250258258102347,
516
+ "eval_loss": 0.4546903967857361,
517
+ "eval_precision": 0.9222845189859414,
518
+ "eval_recall": 0.9280424440067916,
519
+ "eval_runtime": 6.9071,
520
+ "eval_samples_per_second": 532.784,
521
+ "eval_steps_per_second": 8.397,
522
  "step": 2700
523
  },
524
  {
525
+ "epoch": 24.347826086956523,
526
+ "grad_norm": 0.33725014328956604,
527
  "learning_rate": 3.3333333333333333e-06,
528
+ "loss": 0.2806,
529
  "step": 2800
530
  },
531
  {
532
+ "epoch": 24.347826086956523,
533
+ "eval_accuracy": 0.9380434782608695,
534
+ "eval_f1_score": 0.9187707000303608,
535
+ "eval_loss": 0.45898741483688354,
536
+ "eval_precision": 0.9123560681843038,
537
+ "eval_recall": 0.925853281950203,
538
+ "eval_runtime": 6.9172,
539
+ "eval_samples_per_second": 532.007,
540
+ "eval_steps_per_second": 8.385,
541
  "step": 2800
542
  },
543
  {
544
+ "epoch": 25.217391304347824,
545
+ "grad_norm": 4.991839408874512,
546
  "learning_rate": 3.055555555555556e-06,
547
+ "loss": 0.2768,
548
  "step": 2900
549
  },
550
  {
551
+ "epoch": 25.217391304347824,
552
+ "eval_accuracy": 0.936141304347826,
553
+ "eval_f1_score": 0.9188439090576789,
554
+ "eval_loss": 0.4617587625980377,
555
+ "eval_precision": 0.9203622150474884,
556
+ "eval_recall": 0.9179598800095373,
557
+ "eval_runtime": 6.9343,
558
+ "eval_samples_per_second": 530.696,
559
+ "eval_steps_per_second": 8.364,
560
  "step": 2900
561
  },
562
  {
563
+ "epoch": 26.08695652173913,
564
+ "grad_norm": 6.376648902893066,
565
  "learning_rate": 2.7777777777777783e-06,
566
+ "loss": 0.2773,
567
  "step": 3000
568
  },
569
  {
570
+ "epoch": 26.08695652173913,
571
+ "eval_accuracy": 0.9380434782608695,
572
+ "eval_f1_score": 0.9202952986499114,
573
+ "eval_loss": 0.4578970968723297,
574
+ "eval_precision": 0.9176566359948707,
575
+ "eval_recall": 0.9230698673860079,
576
+ "eval_runtime": 6.9545,
577
+ "eval_samples_per_second": 529.152,
578
+ "eval_steps_per_second": 8.34,
579
  "step": 3000
580
  },
581
  {
582
+ "epoch": 26.956521739130434,
583
+ "grad_norm": 8.162334442138672,
584
  "learning_rate": 2.5e-06,
585
+ "loss": 0.2724,
586
  "step": 3100
587
  },
588
  {
589
+ "epoch": 26.956521739130434,
590
+ "eval_accuracy": 0.9407608695652174,
591
+ "eval_f1_score": 0.9252844986926706,
592
+ "eval_loss": 0.4632340669631958,
593
+ "eval_precision": 0.9169145670543192,
594
+ "eval_recall": 0.9342956374641919,
595
+ "eval_runtime": 6.9177,
596
+ "eval_samples_per_second": 531.966,
597
+ "eval_steps_per_second": 8.384,
598
  "step": 3100
599
  },
600
  {
601
+ "epoch": 27.82608695652174,
602
+ "grad_norm": 5.353995323181152,
603
  "learning_rate": 2.222222222222222e-06,
604
+ "loss": 0.2716,
605
  "step": 3200
606
  },
607
  {
608
+ "epoch": 27.82608695652174,
609
+ "eval_accuracy": 0.9364130434782608,
610
+ "eval_f1_score": 0.9204656187460882,
611
+ "eval_loss": 0.4744097590446472,
612
+ "eval_precision": 0.908834486221008,
613
+ "eval_recall": 0.9337614921699828,
614
+ "eval_runtime": 6.9561,
615
+ "eval_samples_per_second": 529.035,
616
+ "eval_steps_per_second": 8.338,
617
  "step": 3200
618
  },
619
  {
620
+ "epoch": 28.695652173913043,
621
+ "grad_norm": 4.451539993286133,
622
  "learning_rate": 1.944444444444445e-06,
623
+ "loss": 0.2705,
624
  "step": 3300
625
  },
626
  {
627
+ "epoch": 28.695652173913043,
628
+ "eval_accuracy": 0.9402173913043478,
629
+ "eval_f1_score": 0.9218337386274096,
630
+ "eval_loss": 0.46000754833221436,
631
+ "eval_precision": 0.9158771078401343,
632
+ "eval_recall": 0.9282202251039701,
633
+ "eval_runtime": 6.9029,
634
+ "eval_samples_per_second": 533.111,
635
+ "eval_steps_per_second": 8.402,
636
  "step": 3300
637
  },
638
  {
639
+ "epoch": 29.565217391304348,
640
+ "grad_norm": 1.1730854511260986,
641
  "learning_rate": 1.6666666666666667e-06,
642
+ "loss": 0.2682,
643
  "step": 3400
644
  },
645
  {
646
+ "epoch": 29.565217391304348,
647
+ "eval_accuracy": 0.9380434782608695,
648
+ "eval_f1_score": 0.9195673704557201,
649
+ "eval_loss": 0.4688616096973419,
650
+ "eval_precision": 0.9141786844421071,
651
+ "eval_recall": 0.9255712172781052,
652
+ "eval_runtime": 6.9457,
653
+ "eval_samples_per_second": 529.826,
654
+ "eval_steps_per_second": 8.351,
655
  "step": 3400
656
  },
657
  {
658
+ "epoch": 30.434782608695652,
659
+ "grad_norm": 0.5718241333961487,
660
  "learning_rate": 1.3888888888888892e-06,
661
+ "loss": 0.2718,
662
  "step": 3500
663
  },
664
  {
665
+ "epoch": 30.434782608695652,
666
+ "eval_accuracy": 0.941304347826087,
667
+ "eval_f1_score": 0.9226474713954919,
668
+ "eval_loss": 0.4682305157184601,
669
+ "eval_precision": 0.917275778746201,
670
+ "eval_recall": 0.928756452719473,
671
+ "eval_runtime": 6.9559,
672
+ "eval_samples_per_second": 529.044,
673
+ "eval_steps_per_second": 8.338,
674
  "step": 3500
675
  },
676
  {
677
+ "epoch": 31.304347826086957,
678
+ "grad_norm": 7.950275897979736,
679
  "learning_rate": 1.111111111111111e-06,
680
+ "loss": 0.2694,
681
  "step": 3600
682
  },
683
  {
684
+ "epoch": 31.304347826086957,
685
+ "eval_accuracy": 0.9385869565217392,
686
+ "eval_f1_score": 0.9200793063280935,
687
+ "eval_loss": 0.4660183787345886,
688
+ "eval_precision": 0.911949308994198,
689
+ "eval_recall": 0.9289019445962466,
690
+ "eval_runtime": 6.9636,
691
+ "eval_samples_per_second": 528.461,
692
+ "eval_steps_per_second": 8.329,
693
  "step": 3600
694
  },
695
  {
696
+ "epoch": 32.17391304347826,
697
+ "grad_norm": 7.988296985626221,
698
  "learning_rate": 8.333333333333333e-07,
699
+ "loss": 0.2678,
700
  "step": 3700
701
  },
702
  {
703
+ "epoch": 32.17391304347826,
704
+ "eval_accuracy": 0.9404891304347827,
705
+ "eval_f1_score": 0.9216222067451383,
706
+ "eval_loss": 0.4612596035003662,
707
+ "eval_precision": 0.9194630478500835,
708
+ "eval_recall": 0.9239463357180048,
709
+ "eval_runtime": 6.9594,
710
+ "eval_samples_per_second": 528.781,
711
+ "eval_steps_per_second": 8.334,
712
  "step": 3700
713
  },
714
  {
715
+ "epoch": 33.04347826086956,
716
+ "grad_norm": 1.6498167514801025,
717
  "learning_rate": 5.555555555555555e-07,
718
+ "loss": 0.2679,
719
  "step": 3800
720
  },
721
  {
722
+ "epoch": 33.04347826086956,
723
+ "eval_accuracy": 0.9407608695652174,
724
+ "eval_f1_score": 0.9224216505483482,
725
+ "eval_loss": 0.46310955286026,
726
+ "eval_precision": 0.9170998731211755,
727
+ "eval_recall": 0.928001378287892,
728
+ "eval_runtime": 6.9582,
729
+ "eval_samples_per_second": 528.873,
730
+ "eval_steps_per_second": 8.335,
731
  "step": 3800
732
  },
733
  {
734
+ "epoch": 33.91304347826087,
735
+ "grad_norm": 3.3423407077789307,
736
+ "learning_rate": 2.7777777777777776e-07,
737
+ "loss": 0.2681,
738
+ "step": 3900
739
+ },
740
+ {
741
+ "epoch": 33.91304347826087,
742
+ "eval_accuracy": 0.9402173913043478,
743
+ "eval_f1_score": 0.92234899080498,
744
+ "eval_loss": 0.4643385410308838,
745
+ "eval_precision": 0.9151804174115907,
746
+ "eval_recall": 0.9299220134801852,
747
+ "eval_runtime": 7.0049,
748
+ "eval_samples_per_second": 525.349,
749
+ "eval_steps_per_second": 8.28,
750
+ "step": 3900
751
+ },
752
+ {
753
+ "epoch": 34.78260869565217,
754
+ "grad_norm": 2.188258409500122,
755
+ "learning_rate": 0.0,
756
+ "loss": 0.2685,
757
+ "step": 4000
758
+ },
759
+ {
760
+ "epoch": 34.78260869565217,
761
+ "eval_accuracy": 0.939945652173913,
762
+ "eval_f1_score": 0.9222328968070977,
763
+ "eval_loss": 0.46605220437049866,
764
+ "eval_precision": 0.9145504342331765,
765
+ "eval_recall": 0.9304139750671819,
766
+ "eval_runtime": 6.9734,
767
+ "eval_samples_per_second": 527.718,
768
+ "eval_steps_per_second": 8.317,
769
+ "step": 4000
770
+ },
771
+ {
772
+ "epoch": 34.78260869565217,
773
+ "step": 4000,
774
+ "total_flos": 8542898522220600.0,
775
+ "train_loss": 0.44249137926101684,
776
+ "train_runtime": 3485.9803,
777
+ "train_samples_per_second": 146.874,
778
+ "train_steps_per_second": 1.147
779
  }
780
  ],
781
  "logging_steps": 100,
 
783
  "num_input_tokens_seen": 0,
784
  "num_train_epochs": 35,
785
  "save_steps": 100,
786
+ "total_flos": 8542898522220600.0,
787
  "train_batch_size": 64,
788
  "trial_name": null,
789
  "trial_params": null