Augusto777 commited on
Commit
da8be71
·
verified ·
1 Parent(s): f1397e3

End of training

Browse files
README.md CHANGED
@@ -21,7 +21,7 @@ model-index:
21
  metrics:
22
  - name: Accuracy
23
  type: accuracy
24
- value: 0.8387096774193549
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -31,8 +31,8 @@ should probably proofread and complete it, then remove this comment. -->
31
 
32
  This model is a fine-tuned version of [MBZUAI/swiftformer-xs](https://huggingface.co/MBZUAI/swiftformer-xs) on the imagefolder dataset.
33
  It achieves the following results on the evaluation set:
34
- - Loss: 0.5844
35
- - Accuracy: 0.8387
36
 
37
  ## Model description
38
 
 
21
  metrics:
22
  - name: Accuracy
23
  type: accuracy
24
+ value: 0.8548387096774194
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
31
 
32
  This model is a fine-tuned version of [MBZUAI/swiftformer-xs](https://huggingface.co/MBZUAI/swiftformer-xs) on the imagefolder dataset.
33
  It achieves the following results on the evaluation set:
34
+ - Loss: 0.4956
35
+ - Accuracy: 0.8548
36
 
37
  ## Model description
38
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 35.56,
3
- "eval_accuracy": 0.45161290322580644,
4
- "eval_loss": 66.47408294677734,
5
- "eval_runtime": 2.0945,
6
- "eval_samples_per_second": 29.601,
7
- "eval_steps_per_second": 1.91,
8
- "train_loss": 1.2252901017665863,
9
- "train_runtime": 452.9974,
10
- "train_samples_per_second": 25.431,
11
  "train_steps_per_second": 0.353
12
  }
 
1
  {
2
  "epoch": 35.56,
3
+ "eval_accuracy": 0.8548387096774194,
4
+ "eval_loss": 0.4955712854862213,
5
+ "eval_runtime": 2.1715,
6
+ "eval_samples_per_second": 28.552,
7
+ "eval_steps_per_second": 1.842,
8
+ "train_loss": 0.5008050501346588,
9
+ "train_runtime": 452.9272,
10
+ "train_samples_per_second": 25.435,
11
  "train_steps_per_second": 0.353
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 35.56,
3
- "eval_accuracy": 0.45161290322580644,
4
- "eval_loss": 66.47408294677734,
5
- "eval_runtime": 2.0945,
6
- "eval_samples_per_second": 29.601,
7
- "eval_steps_per_second": 1.91
8
  }
 
1
  {
2
  "epoch": 35.56,
3
+ "eval_accuracy": 0.8548387096774194,
4
+ "eval_loss": 0.4955712854862213,
5
+ "eval_runtime": 2.1715,
6
+ "eval_samples_per_second": 28.552,
7
+ "eval_steps_per_second": 1.842
8
  }
runs/Dec02_12-41-42_DESKTOP-SKBE9FB/events.out.tfevents.1733165371.DESKTOP-SKBE9FB.6224.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1bb228b70799a7a3b1e86100c383228f81d2d654bd37e209f83446403322517
3
+ size 411
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 35.56,
3
- "train_loss": 1.2252901017665863,
4
- "train_runtime": 452.9974,
5
- "train_samples_per_second": 25.431,
6
  "train_steps_per_second": 0.353
7
  }
 
1
  {
2
  "epoch": 35.56,
3
+ "train_loss": 0.5008050501346588,
4
+ "train_runtime": 452.9272,
5
+ "train_samples_per_second": 25.435,
6
  "train_steps_per_second": 0.353
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.45161290322580644,
3
- "best_model_checkpoint": "swiftformer-xs-OT\\checkpoint-13",
4
  "epoch": 35.55555555555556,
5
  "eval_steps": 500,
6
  "global_step": 160,
@@ -10,431 +10,431 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.89,
13
- "eval_accuracy": 0.41935483870967744,
14
- "eval_loss": 4.055167198181152,
15
- "eval_runtime": 2.037,
16
- "eval_samples_per_second": 30.437,
17
- "eval_steps_per_second": 1.964,
18
  "step": 4
19
  },
20
  {
21
  "epoch": 2.0,
22
- "eval_accuracy": 0.3548387096774194,
23
- "eval_loss": 11.170184135437012,
24
- "eval_runtime": 2.4791,
25
- "eval_samples_per_second": 25.009,
26
- "eval_steps_per_second": 1.613,
27
  "step": 9
28
  },
29
  {
30
  "epoch": 2.22,
31
- "learning_rate": 0.014240506329113924,
32
- "loss": 1.5629,
33
  "step": 10
34
  },
35
  {
36
  "epoch": 2.89,
37
- "eval_accuracy": 0.45161290322580644,
38
- "eval_loss": 66.47408294677734,
39
- "eval_runtime": 2.171,
40
- "eval_samples_per_second": 28.558,
41
- "eval_steps_per_second": 1.842,
42
  "step": 13
43
  },
44
  {
45
  "epoch": 4.0,
46
- "eval_accuracy": 0.3709677419354839,
47
- "eval_loss": 13.566208839416504,
48
- "eval_runtime": 2.1335,
49
- "eval_samples_per_second": 29.06,
50
- "eval_steps_per_second": 1.875,
51
  "step": 18
52
  },
53
  {
54
  "epoch": 4.44,
55
- "learning_rate": 0.013291139240506327,
56
- "loss": 1.2491,
57
  "step": 20
58
  },
59
  {
60
  "epoch": 4.89,
61
- "eval_accuracy": 0.3548387096774194,
62
- "eval_loss": 2.8210699558258057,
63
- "eval_runtime": 1.8659,
64
- "eval_samples_per_second": 33.227,
65
- "eval_steps_per_second": 2.144,
66
  "step": 22
67
  },
68
  {
69
  "epoch": 6.0,
70
- "eval_accuracy": 0.3548387096774194,
71
- "eval_loss": 1.384170651435852,
72
- "eval_runtime": 2.0735,
73
- "eval_samples_per_second": 29.901,
74
- "eval_steps_per_second": 1.929,
75
  "step": 27
76
  },
77
  {
78
  "epoch": 6.67,
79
- "learning_rate": 0.012341772151898734,
80
- "loss": 1.2501,
81
  "step": 30
82
  },
83
  {
84
  "epoch": 6.89,
85
- "eval_accuracy": 0.3064516129032258,
86
- "eval_loss": 1.3205060958862305,
87
- "eval_runtime": 1.9145,
88
- "eval_samples_per_second": 32.385,
89
- "eval_steps_per_second": 2.089,
90
  "step": 31
91
  },
92
  {
93
  "epoch": 8.0,
94
- "eval_accuracy": 0.3548387096774194,
95
- "eval_loss": 2.1143109798431396,
96
- "eval_runtime": 2.154,
97
- "eval_samples_per_second": 28.783,
98
- "eval_steps_per_second": 1.857,
99
  "step": 36
100
  },
101
  {
102
  "epoch": 8.89,
103
- "learning_rate": 0.01139240506329114,
104
- "loss": 1.2154,
105
  "step": 40
106
  },
107
  {
108
  "epoch": 8.89,
109
- "eval_accuracy": 0.45161290322580644,
110
- "eval_loss": 1.6988922357559204,
111
- "eval_runtime": 1.9385,
112
- "eval_samples_per_second": 31.984,
113
- "eval_steps_per_second": 2.063,
114
  "step": 40
115
  },
116
  {
117
  "epoch": 10.0,
118
- "eval_accuracy": 0.4032258064516129,
119
- "eval_loss": 1.3945496082305908,
120
- "eval_runtime": 1.9375,
121
- "eval_samples_per_second": 32.001,
122
- "eval_steps_per_second": 2.065,
123
  "step": 45
124
  },
125
  {
126
  "epoch": 10.89,
127
- "eval_accuracy": 0.4032258064516129,
128
- "eval_loss": 1.3830032348632812,
129
- "eval_runtime": 2.0825,
130
- "eval_samples_per_second": 29.772,
131
- "eval_steps_per_second": 1.921,
132
  "step": 49
133
  },
134
  {
135
  "epoch": 11.11,
136
- "learning_rate": 0.010443037974683544,
137
- "loss": 1.1922,
138
  "step": 50
139
  },
140
  {
141
  "epoch": 12.0,
142
- "eval_accuracy": 0.3870967741935484,
143
- "eval_loss": 1.349541187286377,
144
- "eval_runtime": 1.9725,
145
- "eval_samples_per_second": 31.433,
146
- "eval_steps_per_second": 2.028,
147
  "step": 54
148
  },
149
  {
150
  "epoch": 12.89,
151
- "eval_accuracy": 0.43548387096774194,
152
- "eval_loss": 1.203187346458435,
153
- "eval_runtime": 2.0645,
154
- "eval_samples_per_second": 30.031,
155
- "eval_steps_per_second": 1.938,
156
  "step": 58
157
  },
158
  {
159
  "epoch": 13.33,
160
- "learning_rate": 0.00949367088607595,
161
- "loss": 1.1879,
162
  "step": 60
163
  },
164
  {
165
  "epoch": 14.0,
166
- "eval_accuracy": 0.45161290322580644,
167
- "eval_loss": 1.2532857656478882,
168
- "eval_runtime": 2.0885,
169
- "eval_samples_per_second": 29.686,
170
- "eval_steps_per_second": 1.915,
171
  "step": 63
172
  },
173
  {
174
  "epoch": 14.89,
175
- "eval_accuracy": 0.45161290322580644,
176
- "eval_loss": 1.3339285850524902,
177
- "eval_runtime": 2.0275,
178
- "eval_samples_per_second": 30.58,
179
- "eval_steps_per_second": 1.973,
180
  "step": 67
181
  },
182
  {
183
  "epoch": 15.56,
184
- "learning_rate": 0.008544303797468355,
185
- "loss": 1.2114,
186
  "step": 70
187
  },
188
  {
189
  "epoch": 16.0,
190
- "eval_accuracy": 0.3225806451612903,
191
- "eval_loss": 1.2691409587860107,
192
- "eval_runtime": 2.0745,
193
- "eval_samples_per_second": 29.887,
194
- "eval_steps_per_second": 1.928,
195
  "step": 72
196
  },
197
  {
198
  "epoch": 16.89,
199
- "eval_accuracy": 0.3709677419354839,
200
- "eval_loss": 1.2870497703552246,
201
- "eval_runtime": 1.9805,
202
- "eval_samples_per_second": 31.306,
203
- "eval_steps_per_second": 2.02,
204
  "step": 76
205
  },
206
  {
207
  "epoch": 17.78,
208
- "learning_rate": 0.00759493670886076,
209
- "loss": 1.2266,
210
  "step": 80
211
  },
212
  {
213
  "epoch": 18.0,
214
- "eval_accuracy": 0.45161290322580644,
215
- "eval_loss": 1.1817296743392944,
216
- "eval_runtime": 2.064,
217
- "eval_samples_per_second": 30.039,
218
- "eval_steps_per_second": 1.938,
219
  "step": 81
220
  },
221
  {
222
  "epoch": 18.89,
223
- "eval_accuracy": 0.45161290322580644,
224
- "eval_loss": 1.2077744007110596,
225
- "eval_runtime": 2.0575,
226
- "eval_samples_per_second": 30.134,
227
- "eval_steps_per_second": 1.944,
228
  "step": 85
229
  },
230
  {
231
  "epoch": 20.0,
232
- "learning_rate": 0.0066455696202531635,
233
- "loss": 1.2392,
234
  "step": 90
235
  },
236
  {
237
  "epoch": 20.0,
238
- "eval_accuracy": 0.45161290322580644,
239
- "eval_loss": 1.2126984596252441,
240
- "eval_runtime": 2.1705,
241
- "eval_samples_per_second": 28.564,
242
- "eval_steps_per_second": 1.843,
243
  "step": 90
244
  },
245
  {
246
  "epoch": 20.89,
247
- "eval_accuracy": 0.43548387096774194,
248
- "eval_loss": 1.2361472845077515,
249
- "eval_runtime": 1.957,
250
- "eval_samples_per_second": 31.682,
251
- "eval_steps_per_second": 2.044,
252
  "step": 94
253
  },
254
  {
255
  "epoch": 22.0,
256
- "eval_accuracy": 0.45161290322580644,
257
- "eval_loss": 1.5839189291000366,
258
- "eval_runtime": 2.1675,
259
- "eval_samples_per_second": 28.604,
260
- "eval_steps_per_second": 1.845,
261
  "step": 99
262
  },
263
  {
264
  "epoch": 22.22,
265
- "learning_rate": 0.00569620253164557,
266
- "loss": 1.228,
267
  "step": 100
268
  },
269
  {
270
  "epoch": 22.89,
271
- "eval_accuracy": 0.3870967741935484,
272
- "eval_loss": 1.2531583309173584,
273
- "eval_runtime": 2.16,
274
- "eval_samples_per_second": 28.704,
275
- "eval_steps_per_second": 1.852,
276
  "step": 103
277
  },
278
  {
279
  "epoch": 24.0,
280
- "eval_accuracy": 0.3709677419354839,
281
- "eval_loss": 1.3878097534179688,
282
- "eval_runtime": 2.122,
283
- "eval_samples_per_second": 29.218,
284
- "eval_steps_per_second": 1.885,
285
  "step": 108
286
  },
287
  {
288
  "epoch": 24.44,
289
- "learning_rate": 0.004746835443037975,
290
- "loss": 1.197,
291
  "step": 110
292
  },
293
  {
294
  "epoch": 24.89,
295
- "eval_accuracy": 0.3548387096774194,
296
- "eval_loss": 1.31118905544281,
297
- "eval_runtime": 2.163,
298
- "eval_samples_per_second": 28.664,
299
- "eval_steps_per_second": 1.849,
300
  "step": 112
301
  },
302
  {
303
  "epoch": 26.0,
304
- "eval_accuracy": 0.45161290322580644,
305
- "eval_loss": 1.1860616207122803,
306
- "eval_runtime": 2.1025,
307
- "eval_samples_per_second": 29.489,
308
- "eval_steps_per_second": 1.902,
309
  "step": 117
310
  },
311
  {
312
  "epoch": 26.67,
313
- "learning_rate": 0.00379746835443038,
314
- "loss": 1.1873,
315
  "step": 120
316
  },
317
  {
318
  "epoch": 26.89,
319
- "eval_accuracy": 0.3870967741935484,
320
- "eval_loss": 1.1917904615402222,
321
- "eval_runtime": 2.1285,
322
- "eval_samples_per_second": 29.129,
323
- "eval_steps_per_second": 1.879,
324
  "step": 121
325
  },
326
  {
327
  "epoch": 28.0,
328
- "eval_accuracy": 0.3709677419354839,
329
- "eval_loss": 1.2353166341781616,
330
- "eval_runtime": 2.158,
331
- "eval_samples_per_second": 28.73,
332
- "eval_steps_per_second": 1.854,
333
  "step": 126
334
  },
335
  {
336
  "epoch": 28.89,
337
- "learning_rate": 0.002848101265822785,
338
- "loss": 1.1681,
339
  "step": 130
340
  },
341
  {
342
  "epoch": 28.89,
343
- "eval_accuracy": 0.3709677419354839,
344
- "eval_loss": 1.1556626558303833,
345
- "eval_runtime": 2.037,
346
- "eval_samples_per_second": 30.437,
347
- "eval_steps_per_second": 1.964,
348
  "step": 130
349
  },
350
  {
351
  "epoch": 30.0,
352
- "eval_accuracy": 0.3709677419354839,
353
- "eval_loss": 1.1894261837005615,
354
- "eval_runtime": 2.232,
355
- "eval_samples_per_second": 27.778,
356
- "eval_steps_per_second": 1.792,
357
  "step": 135
358
  },
359
  {
360
  "epoch": 30.89,
361
- "eval_accuracy": 0.3709677419354839,
362
- "eval_loss": 1.2173370122909546,
363
- "eval_runtime": 2.0738,
364
- "eval_samples_per_second": 29.897,
365
- "eval_steps_per_second": 1.929,
366
  "step": 139
367
  },
368
  {
369
  "epoch": 31.11,
370
- "learning_rate": 0.00189873417721519,
371
- "loss": 1.1705,
372
  "step": 140
373
  },
374
  {
375
  "epoch": 32.0,
376
- "eval_accuracy": 0.4032258064516129,
377
- "eval_loss": 1.2082428932189941,
378
- "eval_runtime": 1.911,
379
- "eval_samples_per_second": 32.444,
380
- "eval_steps_per_second": 2.093,
381
  "step": 144
382
  },
383
  {
384
  "epoch": 32.89,
385
- "eval_accuracy": 0.41935483870967744,
386
- "eval_loss": 1.2061160802841187,
387
- "eval_runtime": 1.952,
388
- "eval_samples_per_second": 31.763,
389
- "eval_steps_per_second": 2.049,
390
  "step": 148
391
  },
392
  {
393
  "epoch": 33.33,
394
- "learning_rate": 0.000949367088607595,
395
- "loss": 1.1767,
396
  "step": 150
397
  },
398
  {
399
  "epoch": 34.0,
400
- "eval_accuracy": 0.41935483870967744,
401
- "eval_loss": 1.208241581916809,
402
- "eval_runtime": 1.9655,
403
- "eval_samples_per_second": 31.545,
404
- "eval_steps_per_second": 2.035,
405
  "step": 153
406
  },
407
  {
408
  "epoch": 34.89,
409
- "eval_accuracy": 0.41935483870967744,
410
- "eval_loss": 1.2024096250534058,
411
- "eval_runtime": 1.9505,
412
- "eval_samples_per_second": 31.787,
413
- "eval_steps_per_second": 2.051,
414
  "step": 157
415
  },
416
  {
417
  "epoch": 35.56,
418
  "learning_rate": 0.0,
419
- "loss": 1.1424,
420
  "step": 160
421
  },
422
  {
423
  "epoch": 35.56,
424
- "eval_accuracy": 0.43548387096774194,
425
- "eval_loss": 1.1981048583984375,
426
- "eval_runtime": 2.016,
427
- "eval_samples_per_second": 30.754,
428
- "eval_steps_per_second": 1.984,
429
  "step": 160
430
  },
431
  {
432
  "epoch": 35.56,
433
  "step": 160,
434
  "total_flos": 2.807020017156096e+16,
435
- "train_loss": 1.2252901017665863,
436
- "train_runtime": 452.9974,
437
- "train_samples_per_second": 25.431,
438
  "train_steps_per_second": 0.353
439
  }
440
  ],
 
1
  {
2
+ "best_metric": 0.8548387096774194,
3
+ "best_model_checkpoint": "swiftformer-xs-OT\\checkpoint-139",
4
  "epoch": 35.55555555555556,
5
  "eval_steps": 500,
6
  "global_step": 160,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.89,
13
+ "eval_accuracy": 0.5161290322580645,
14
+ "eval_loss": 1.3804326057434082,
15
+ "eval_runtime": 2.0395,
16
+ "eval_samples_per_second": 30.4,
17
+ "eval_steps_per_second": 1.961,
18
  "step": 4
19
  },
20
  {
21
  "epoch": 2.0,
22
+ "eval_accuracy": 0.532258064516129,
23
+ "eval_loss": 1.2554447650909424,
24
+ "eval_runtime": 1.9685,
25
+ "eval_samples_per_second": 31.497,
26
+ "eval_steps_per_second": 2.032,
27
  "step": 9
28
  },
29
  {
30
  "epoch": 2.22,
31
+ "learning_rate": 0.0009375,
32
+ "loss": 1.3469,
33
  "step": 10
34
  },
35
  {
36
  "epoch": 2.89,
37
+ "eval_accuracy": 0.6612903225806451,
38
+ "eval_loss": 0.9724773168563843,
39
+ "eval_runtime": 1.9795,
40
+ "eval_samples_per_second": 31.321,
41
+ "eval_steps_per_second": 2.021,
42
  "step": 13
43
  },
44
  {
45
  "epoch": 4.0,
46
+ "eval_accuracy": 0.7580645161290323,
47
+ "eval_loss": 0.7085855007171631,
48
+ "eval_runtime": 2.3836,
49
+ "eval_samples_per_second": 26.011,
50
+ "eval_steps_per_second": 1.678,
51
  "step": 18
52
  },
53
  {
54
  "epoch": 4.44,
55
+ "learning_rate": 0.0014583333333333334,
56
+ "loss": 0.9831,
57
  "step": 20
58
  },
59
  {
60
  "epoch": 4.89,
61
+ "eval_accuracy": 0.7258064516129032,
62
+ "eval_loss": 0.8855839371681213,
63
+ "eval_runtime": 2.156,
64
+ "eval_samples_per_second": 28.757,
65
+ "eval_steps_per_second": 1.855,
66
  "step": 22
67
  },
68
  {
69
  "epoch": 6.0,
70
+ "eval_accuracy": 0.7580645161290323,
71
+ "eval_loss": 0.7723659873008728,
72
+ "eval_runtime": 1.9627,
73
+ "eval_samples_per_second": 31.588,
74
+ "eval_steps_per_second": 2.038,
75
  "step": 27
76
  },
77
  {
78
  "epoch": 6.67,
79
+ "learning_rate": 0.0013541666666666667,
80
+ "loss": 0.7441,
81
  "step": 30
82
  },
83
  {
84
  "epoch": 6.89,
85
+ "eval_accuracy": 0.7258064516129032,
86
+ "eval_loss": 0.819038450717926,
87
+ "eval_runtime": 1.979,
88
+ "eval_samples_per_second": 31.33,
89
+ "eval_steps_per_second": 2.021,
90
  "step": 31
91
  },
92
  {
93
  "epoch": 8.0,
94
+ "eval_accuracy": 0.7741935483870968,
95
+ "eval_loss": 0.6897338628768921,
96
+ "eval_runtime": 1.8919,
97
+ "eval_samples_per_second": 32.77,
98
+ "eval_steps_per_second": 2.114,
99
  "step": 36
100
  },
101
  {
102
  "epoch": 8.89,
103
+ "learning_rate": 0.00125,
104
+ "loss": 0.6939,
105
  "step": 40
106
  },
107
  {
108
  "epoch": 8.89,
109
+ "eval_accuracy": 0.7258064516129032,
110
+ "eval_loss": 0.6599262356758118,
111
+ "eval_runtime": 1.9435,
112
+ "eval_samples_per_second": 31.902,
113
+ "eval_steps_per_second": 2.058,
114
  "step": 40
115
  },
116
  {
117
  "epoch": 10.0,
118
+ "eval_accuracy": 0.7741935483870968,
119
+ "eval_loss": 0.6288474798202515,
120
+ "eval_runtime": 2.1175,
121
+ "eval_samples_per_second": 29.28,
122
+ "eval_steps_per_second": 1.889,
123
  "step": 45
124
  },
125
  {
126
  "epoch": 10.89,
127
+ "eval_accuracy": 0.7580645161290323,
128
+ "eval_loss": 0.6333299279212952,
129
+ "eval_runtime": 2.055,
130
+ "eval_samples_per_second": 30.17,
131
+ "eval_steps_per_second": 1.946,
132
  "step": 49
133
  },
134
  {
135
  "epoch": 11.11,
136
+ "learning_rate": 0.0011458333333333333,
137
+ "loss": 0.5861,
138
  "step": 50
139
  },
140
  {
141
  "epoch": 12.0,
142
+ "eval_accuracy": 0.7741935483870968,
143
+ "eval_loss": 0.6206099987030029,
144
+ "eval_runtime": 2.139,
145
+ "eval_samples_per_second": 28.985,
146
+ "eval_steps_per_second": 1.87,
147
  "step": 54
148
  },
149
  {
150
  "epoch": 12.89,
151
+ "eval_accuracy": 0.7903225806451613,
152
+ "eval_loss": 0.5262896418571472,
153
+ "eval_runtime": 2.0025,
154
+ "eval_samples_per_second": 30.961,
155
+ "eval_steps_per_second": 1.998,
156
  "step": 58
157
  },
158
  {
159
  "epoch": 13.33,
160
+ "learning_rate": 0.0010416666666666667,
161
+ "loss": 0.5018,
162
  "step": 60
163
  },
164
  {
165
  "epoch": 14.0,
166
+ "eval_accuracy": 0.8064516129032258,
167
+ "eval_loss": 0.583601176738739,
168
+ "eval_runtime": 2.0815,
169
+ "eval_samples_per_second": 29.786,
170
+ "eval_steps_per_second": 1.922,
171
  "step": 63
172
  },
173
  {
174
  "epoch": 14.89,
175
+ "eval_accuracy": 0.7419354838709677,
176
+ "eval_loss": 0.6125115156173706,
177
+ "eval_runtime": 1.948,
178
+ "eval_samples_per_second": 31.828,
179
+ "eval_steps_per_second": 2.053,
180
  "step": 67
181
  },
182
  {
183
  "epoch": 15.56,
184
+ "learning_rate": 0.0009375,
185
+ "loss": 0.4642,
186
  "step": 70
187
  },
188
  {
189
  "epoch": 16.0,
190
+ "eval_accuracy": 0.8064516129032258,
191
+ "eval_loss": 0.5431403517723083,
192
+ "eval_runtime": 1.921,
193
+ "eval_samples_per_second": 32.276,
194
+ "eval_steps_per_second": 2.082,
195
  "step": 72
196
  },
197
  {
198
  "epoch": 16.89,
199
+ "eval_accuracy": 0.8387096774193549,
200
+ "eval_loss": 0.5893498063087463,
201
+ "eval_runtime": 1.9645,
202
+ "eval_samples_per_second": 31.561,
203
+ "eval_steps_per_second": 2.036,
204
  "step": 76
205
  },
206
  {
207
  "epoch": 17.78,
208
+ "learning_rate": 0.0008333333333333334,
209
+ "loss": 0.4064,
210
  "step": 80
211
  },
212
  {
213
  "epoch": 18.0,
214
+ "eval_accuracy": 0.8064516129032258,
215
+ "eval_loss": 0.49968525767326355,
216
+ "eval_runtime": 1.8915,
217
+ "eval_samples_per_second": 32.779,
218
+ "eval_steps_per_second": 2.115,
219
  "step": 81
220
  },
221
  {
222
  "epoch": 18.89,
223
+ "eval_accuracy": 0.7741935483870968,
224
+ "eval_loss": 0.5474066138267517,
225
+ "eval_runtime": 1.968,
226
+ "eval_samples_per_second": 31.505,
227
+ "eval_steps_per_second": 2.033,
228
  "step": 85
229
  },
230
  {
231
  "epoch": 20.0,
232
+ "learning_rate": 0.0007291666666666667,
233
+ "loss": 0.4275,
234
  "step": 90
235
  },
236
  {
237
  "epoch": 20.0,
238
+ "eval_accuracy": 0.7903225806451613,
239
+ "eval_loss": 0.6748386025428772,
240
+ "eval_runtime": 1.963,
241
+ "eval_samples_per_second": 31.585,
242
+ "eval_steps_per_second": 2.038,
243
  "step": 90
244
  },
245
  {
246
  "epoch": 20.89,
247
+ "eval_accuracy": 0.7580645161290323,
248
+ "eval_loss": 0.6369444131851196,
249
+ "eval_runtime": 1.98,
250
+ "eval_samples_per_second": 31.314,
251
+ "eval_steps_per_second": 2.02,
252
  "step": 94
253
  },
254
  {
255
  "epoch": 22.0,
256
+ "eval_accuracy": 0.7741935483870968,
257
+ "eval_loss": 0.5609545111656189,
258
+ "eval_runtime": 1.974,
259
+ "eval_samples_per_second": 31.409,
260
+ "eval_steps_per_second": 2.026,
261
  "step": 99
262
  },
263
  {
264
  "epoch": 22.22,
265
+ "learning_rate": 0.000625,
266
+ "loss": 0.373,
267
  "step": 100
268
  },
269
  {
270
  "epoch": 22.89,
271
+ "eval_accuracy": 0.7903225806451613,
272
+ "eval_loss": 0.5259799957275391,
273
+ "eval_runtime": 1.9815,
274
+ "eval_samples_per_second": 31.29,
275
+ "eval_steps_per_second": 2.019,
276
  "step": 103
277
  },
278
  {
279
  "epoch": 24.0,
280
+ "eval_accuracy": 0.8387096774193549,
281
+ "eval_loss": 0.5416117310523987,
282
+ "eval_runtime": 2.1095,
283
+ "eval_samples_per_second": 29.391,
284
+ "eval_steps_per_second": 1.896,
285
  "step": 108
286
  },
287
  {
288
  "epoch": 24.44,
289
+ "learning_rate": 0.0005208333333333333,
290
+ "loss": 0.2931,
291
  "step": 110
292
  },
293
  {
294
  "epoch": 24.89,
295
+ "eval_accuracy": 0.8387096774193549,
296
+ "eval_loss": 0.5146054029464722,
297
+ "eval_runtime": 2.1155,
298
+ "eval_samples_per_second": 29.308,
299
+ "eval_steps_per_second": 1.891,
300
  "step": 112
301
  },
302
  {
303
  "epoch": 26.0,
304
+ "eval_accuracy": 0.7741935483870968,
305
+ "eval_loss": 0.5180203914642334,
306
+ "eval_runtime": 1.8704,
307
+ "eval_samples_per_second": 33.147,
308
+ "eval_steps_per_second": 2.139,
309
  "step": 117
310
  },
311
  {
312
  "epoch": 26.67,
313
+ "learning_rate": 0.0004166666666666667,
314
+ "loss": 0.3135,
315
  "step": 120
316
  },
317
  {
318
  "epoch": 26.89,
319
+ "eval_accuracy": 0.8225806451612904,
320
+ "eval_loss": 0.5169466733932495,
321
+ "eval_runtime": 2.0975,
322
+ "eval_samples_per_second": 29.559,
323
+ "eval_steps_per_second": 1.907,
324
  "step": 121
325
  },
326
  {
327
  "epoch": 28.0,
328
+ "eval_accuracy": 0.8387096774193549,
329
+ "eval_loss": 0.5491407513618469,
330
+ "eval_runtime": 2.1065,
331
+ "eval_samples_per_second": 29.433,
332
+ "eval_steps_per_second": 1.899,
333
  "step": 126
334
  },
335
  {
336
  "epoch": 28.89,
337
+ "learning_rate": 0.0003125,
338
+ "loss": 0.2342,
339
  "step": 130
340
  },
341
  {
342
  "epoch": 28.89,
343
+ "eval_accuracy": 0.8387096774193549,
344
+ "eval_loss": 0.5384820699691772,
345
+ "eval_runtime": 2.084,
346
+ "eval_samples_per_second": 29.751,
347
+ "eval_steps_per_second": 1.919,
348
  "step": 130
349
  },
350
  {
351
  "epoch": 30.0,
352
+ "eval_accuracy": 0.8387096774193549,
353
+ "eval_loss": 0.5455949306488037,
354
+ "eval_runtime": 1.9334,
355
+ "eval_samples_per_second": 32.067,
356
+ "eval_steps_per_second": 2.069,
357
  "step": 135
358
  },
359
  {
360
  "epoch": 30.89,
361
+ "eval_accuracy": 0.8548387096774194,
362
+ "eval_loss": 0.4955712854862213,
363
+ "eval_runtime": 2.2074,
364
+ "eval_samples_per_second": 28.088,
365
+ "eval_steps_per_second": 1.812,
366
  "step": 139
367
  },
368
  {
369
  "epoch": 31.11,
370
+ "learning_rate": 0.00020833333333333335,
371
+ "loss": 0.2411,
372
  "step": 140
373
  },
374
  {
375
  "epoch": 32.0,
376
+ "eval_accuracy": 0.8225806451612904,
377
+ "eval_loss": 0.5254048109054565,
378
+ "eval_runtime": 2.3185,
379
+ "eval_samples_per_second": 26.741,
380
+ "eval_steps_per_second": 1.725,
381
  "step": 144
382
  },
383
  {
384
  "epoch": 32.89,
385
+ "eval_accuracy": 0.8387096774193549,
386
+ "eval_loss": 0.5533136129379272,
387
+ "eval_runtime": 1.8899,
388
+ "eval_samples_per_second": 32.805,
389
+ "eval_steps_per_second": 2.116,
390
  "step": 148
391
  },
392
  {
393
  "epoch": 33.33,
394
+ "learning_rate": 0.00010416666666666667,
395
+ "loss": 0.2135,
396
  "step": 150
397
  },
398
  {
399
  "epoch": 34.0,
400
+ "eval_accuracy": 0.8387096774193549,
401
+ "eval_loss": 0.5612671971321106,
402
+ "eval_runtime": 2.0095,
403
+ "eval_samples_per_second": 30.854,
404
+ "eval_steps_per_second": 1.991,
405
  "step": 153
406
  },
407
  {
408
  "epoch": 34.89,
409
+ "eval_accuracy": 0.8225806451612904,
410
+ "eval_loss": 0.5748429894447327,
411
+ "eval_runtime": 2.0385,
412
+ "eval_samples_per_second": 30.415,
413
+ "eval_steps_per_second": 1.962,
414
  "step": 157
415
  },
416
  {
417
  "epoch": 35.56,
418
  "learning_rate": 0.0,
419
+ "loss": 0.1904,
420
  "step": 160
421
  },
422
  {
423
  "epoch": 35.56,
424
+ "eval_accuracy": 0.8387096774193549,
425
+ "eval_loss": 0.5843761563301086,
426
+ "eval_runtime": 2.02,
427
+ "eval_samples_per_second": 30.694,
428
+ "eval_steps_per_second": 1.98,
429
  "step": 160
430
  },
431
  {
432
  "epoch": 35.56,
433
  "step": 160,
434
  "total_flos": 2.807020017156096e+16,
435
+ "train_loss": 0.5008050501346588,
436
+ "train_runtime": 452.9272,
437
+ "train_samples_per_second": 25.435,
438
  "train_steps_per_second": 0.353
439
  }
440
  ],