alexgrigore commited on
Commit
80c82ac
·
verified ·
1 Parent(s): b17fa83

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +10 -0
  2. test_results.json +10 -0
  3. trainer_state.json +522 -0
all_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 19.05,
3
+ "eval_accuracy": 0.8932038834951457,
4
+ "eval_accuracy_gunit": 0.5416666666666666,
5
+ "eval_accuracy_nothing": 1.0,
6
+ "eval_loss": 0.3490257263183594,
7
+ "eval_runtime": 13.7098,
8
+ "eval_samples_per_second": 7.513,
9
+ "eval_steps_per_second": 0.948
10
+ }
test_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 19.05,
3
+ "eval_accuracy": 0.8932038834951457,
4
+ "eval_accuracy_gunit": 0.5416666666666666,
5
+ "eval_accuracy_nothing": 1.0,
6
+ "eval_loss": 0.3490257263183594,
7
+ "eval_runtime": 13.7098,
8
+ "eval_samples_per_second": 7.513,
9
+ "eval_steps_per_second": 0.948
10
+ }
trainer_state.json ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9504132231404959,
3
+ "best_model_checkpoint": "videomae-base-finetuned-good-gestureUnitsV3/checkpoint-255",
4
+ "epoch": 19.05,
5
+ "eval_steps": 500,
6
+ "global_step": 340,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.029411764705882353,
13
+ "grad_norm": 9.534835815429688,
14
+ "learning_rate": 1.4705882352941177e-06,
15
+ "loss": 0.7047,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.05,
20
+ "eval_accuracy": 0.5619834710743802,
21
+ "eval_accuracy_gunit": 0.575,
22
+ "eval_accuracy_nothing": 0.5555555555555556,
23
+ "eval_loss": 0.7023928165435791,
24
+ "eval_runtime": 15.7256,
25
+ "eval_samples_per_second": 7.694,
26
+ "eval_steps_per_second": 1.017,
27
+ "step": 17
28
+ },
29
+ {
30
+ "epoch": 1.0088235294117647,
31
+ "grad_norm": 5.451850414276123,
32
+ "learning_rate": 2.9411764705882355e-06,
33
+ "loss": 0.7083,
34
+ "step": 20
35
+ },
36
+ {
37
+ "epoch": 1.0382352941176471,
38
+ "grad_norm": 5.070091724395752,
39
+ "learning_rate": 4.411764705882353e-06,
40
+ "loss": 0.6958,
41
+ "step": 30
42
+ },
43
+ {
44
+ "epoch": 1.05,
45
+ "eval_accuracy": 0.38016528925619836,
46
+ "eval_accuracy_gunit": 0.725,
47
+ "eval_accuracy_nothing": 0.20987654320987653,
48
+ "eval_loss": 0.7165102362632751,
49
+ "eval_runtime": 15.8798,
50
+ "eval_samples_per_second": 7.62,
51
+ "eval_steps_per_second": 1.008,
52
+ "step": 34
53
+ },
54
+ {
55
+ "epoch": 2.0176470588235293,
56
+ "grad_norm": 4.038697719573975,
57
+ "learning_rate": 4.901960784313726e-06,
58
+ "loss": 0.6585,
59
+ "step": 40
60
+ },
61
+ {
62
+ "epoch": 2.0470588235294116,
63
+ "grad_norm": 3.2235519886016846,
64
+ "learning_rate": 4.7385620915032685e-06,
65
+ "loss": 0.6534,
66
+ "step": 50
67
+ },
68
+ {
69
+ "epoch": 2.05,
70
+ "eval_accuracy": 0.371900826446281,
71
+ "eval_accuracy_gunit": 0.85,
72
+ "eval_accuracy_nothing": 0.13580246913580246,
73
+ "eval_loss": 0.7150475382804871,
74
+ "eval_runtime": 16.0184,
75
+ "eval_samples_per_second": 7.554,
76
+ "eval_steps_per_second": 0.999,
77
+ "step": 51
78
+ },
79
+ {
80
+ "epoch": 3.026470588235294,
81
+ "grad_norm": 4.749432563781738,
82
+ "learning_rate": 4.5751633986928105e-06,
83
+ "loss": 0.655,
84
+ "step": 60
85
+ },
86
+ {
87
+ "epoch": 3.05,
88
+ "eval_accuracy": 0.32231404958677684,
89
+ "eval_accuracy_gunit": 0.975,
90
+ "eval_accuracy_nothing": 0.0,
91
+ "eval_loss": 0.7505635619163513,
92
+ "eval_runtime": 15.7625,
93
+ "eval_samples_per_second": 7.676,
94
+ "eval_steps_per_second": 1.015,
95
+ "step": 68
96
+ },
97
+ {
98
+ "epoch": 4.0058823529411764,
99
+ "grad_norm": 6.722472190856934,
100
+ "learning_rate": 4.411764705882353e-06,
101
+ "loss": 0.6741,
102
+ "step": 70
103
+ },
104
+ {
105
+ "epoch": 4.035294117647059,
106
+ "grad_norm": 6.8286638259887695,
107
+ "learning_rate": 4.2483660130718954e-06,
108
+ "loss": 0.6545,
109
+ "step": 80
110
+ },
111
+ {
112
+ "epoch": 4.05,
113
+ "eval_accuracy": 0.4297520661157025,
114
+ "eval_accuracy_gunit": 0.95,
115
+ "eval_accuracy_nothing": 0.1728395061728395,
116
+ "eval_loss": 0.6991418600082397,
117
+ "eval_runtime": 15.5171,
118
+ "eval_samples_per_second": 7.798,
119
+ "eval_steps_per_second": 1.031,
120
+ "step": 85
121
+ },
122
+ {
123
+ "epoch": 5.014705882352941,
124
+ "grad_norm": 7.191628456115723,
125
+ "learning_rate": 4.084967320261438e-06,
126
+ "loss": 0.56,
127
+ "step": 90
128
+ },
129
+ {
130
+ "epoch": 5.044117647058823,
131
+ "grad_norm": 5.416815757751465,
132
+ "learning_rate": 3.92156862745098e-06,
133
+ "loss": 0.6321,
134
+ "step": 100
135
+ },
136
+ {
137
+ "epoch": 5.05,
138
+ "eval_accuracy": 0.47107438016528924,
139
+ "eval_accuracy_gunit": 0.95,
140
+ "eval_accuracy_nothing": 0.2345679012345679,
141
+ "eval_loss": 0.6750265955924988,
142
+ "eval_runtime": 15.9488,
143
+ "eval_samples_per_second": 7.587,
144
+ "eval_steps_per_second": 1.003,
145
+ "step": 102
146
+ },
147
+ {
148
+ "epoch": 6.023529411764706,
149
+ "grad_norm": 4.386753559112549,
150
+ "learning_rate": 3.758169934640523e-06,
151
+ "loss": 0.5883,
152
+ "step": 110
153
+ },
154
+ {
155
+ "epoch": 6.05,
156
+ "eval_accuracy": 0.743801652892562,
157
+ "eval_accuracy_gunit": 0.9,
158
+ "eval_accuracy_nothing": 0.6666666666666666,
159
+ "eval_loss": 0.6142133474349976,
160
+ "eval_runtime": 16.6481,
161
+ "eval_samples_per_second": 7.268,
162
+ "eval_steps_per_second": 0.961,
163
+ "step": 119
164
+ },
165
+ {
166
+ "epoch": 7.002941176470588,
167
+ "grad_norm": 5.5603108406066895,
168
+ "learning_rate": 3.5947712418300657e-06,
169
+ "loss": 0.5803,
170
+ "step": 120
171
+ },
172
+ {
173
+ "epoch": 7.0323529411764705,
174
+ "grad_norm": 3.9502112865448,
175
+ "learning_rate": 3.431372549019608e-06,
176
+ "loss": 0.5045,
177
+ "step": 130
178
+ },
179
+ {
180
+ "epoch": 7.05,
181
+ "eval_accuracy": 0.8181818181818182,
182
+ "eval_accuracy_gunit": 0.9,
183
+ "eval_accuracy_nothing": 0.7777777777777778,
184
+ "eval_loss": 0.549534022808075,
185
+ "eval_runtime": 15.8267,
186
+ "eval_samples_per_second": 7.645,
187
+ "eval_steps_per_second": 1.011,
188
+ "step": 136
189
+ },
190
+ {
191
+ "epoch": 8.011764705882353,
192
+ "grad_norm": 5.036988735198975,
193
+ "learning_rate": 3.2679738562091506e-06,
194
+ "loss": 0.578,
195
+ "step": 140
196
+ },
197
+ {
198
+ "epoch": 8.041176470588235,
199
+ "grad_norm": 4.750277519226074,
200
+ "learning_rate": 3.104575163398693e-06,
201
+ "loss": 0.455,
202
+ "step": 150
203
+ },
204
+ {
205
+ "epoch": 8.05,
206
+ "eval_accuracy": 0.71900826446281,
207
+ "eval_accuracy_gunit": 0.925,
208
+ "eval_accuracy_nothing": 0.6172839506172839,
209
+ "eval_loss": 0.572287917137146,
210
+ "eval_runtime": 16.0277,
211
+ "eval_samples_per_second": 7.549,
212
+ "eval_steps_per_second": 0.998,
213
+ "step": 153
214
+ },
215
+ {
216
+ "epoch": 9.020588235294118,
217
+ "grad_norm": 4.390650749206543,
218
+ "learning_rate": 2.9411764705882355e-06,
219
+ "loss": 0.382,
220
+ "step": 160
221
+ },
222
+ {
223
+ "epoch": 9.05,
224
+ "grad_norm": 12.403214454650879,
225
+ "learning_rate": 2.7777777777777783e-06,
226
+ "loss": 0.4191,
227
+ "step": 170
228
+ },
229
+ {
230
+ "epoch": 9.05,
231
+ "eval_accuracy": 0.9338842975206612,
232
+ "eval_accuracy_gunit": 0.85,
233
+ "eval_accuracy_nothing": 0.9753086419753086,
234
+ "eval_loss": 0.37085020542144775,
235
+ "eval_runtime": 19.423,
236
+ "eval_samples_per_second": 6.23,
237
+ "eval_steps_per_second": 0.824,
238
+ "step": 170
239
+ },
240
+ {
241
+ "epoch": 10.029411764705882,
242
+ "grad_norm": 8.752729415893555,
243
+ "learning_rate": 2.6143790849673208e-06,
244
+ "loss": 0.365,
245
+ "step": 180
246
+ },
247
+ {
248
+ "epoch": 10.05,
249
+ "eval_accuracy": 0.9090909090909091,
250
+ "eval_accuracy_gunit": 0.85,
251
+ "eval_accuracy_nothing": 0.9382716049382716,
252
+ "eval_loss": 0.3546590209007263,
253
+ "eval_runtime": 15.5146,
254
+ "eval_samples_per_second": 7.799,
255
+ "eval_steps_per_second": 1.031,
256
+ "step": 187
257
+ },
258
+ {
259
+ "epoch": 11.008823529411766,
260
+ "grad_norm": 6.302486896514893,
261
+ "learning_rate": 2.450980392156863e-06,
262
+ "loss": 0.3493,
263
+ "step": 190
264
+ },
265
+ {
266
+ "epoch": 11.038235294117648,
267
+ "grad_norm": 4.457513809204102,
268
+ "learning_rate": 2.2875816993464053e-06,
269
+ "loss": 0.2593,
270
+ "step": 200
271
+ },
272
+ {
273
+ "epoch": 11.05,
274
+ "eval_accuracy": 0.8925619834710744,
275
+ "eval_accuracy_gunit": 0.85,
276
+ "eval_accuracy_nothing": 0.9135802469135802,
277
+ "eval_loss": 0.36317938566207886,
278
+ "eval_runtime": 15.7061,
279
+ "eval_samples_per_second": 7.704,
280
+ "eval_steps_per_second": 1.019,
281
+ "step": 204
282
+ },
283
+ {
284
+ "epoch": 12.01764705882353,
285
+ "grad_norm": 11.817601203918457,
286
+ "learning_rate": 2.1241830065359477e-06,
287
+ "loss": 0.3111,
288
+ "step": 210
289
+ },
290
+ {
291
+ "epoch": 12.047058823529412,
292
+ "grad_norm": 6.769512176513672,
293
+ "learning_rate": 1.96078431372549e-06,
294
+ "loss": 0.2225,
295
+ "step": 220
296
+ },
297
+ {
298
+ "epoch": 12.05,
299
+ "eval_accuracy": 0.9421487603305785,
300
+ "eval_accuracy_gunit": 0.85,
301
+ "eval_accuracy_nothing": 0.9876543209876543,
302
+ "eval_loss": 0.24565516412258148,
303
+ "eval_runtime": 20.1411,
304
+ "eval_samples_per_second": 6.008,
305
+ "eval_steps_per_second": 0.794,
306
+ "step": 221
307
+ },
308
+ {
309
+ "epoch": 13.026470588235295,
310
+ "grad_norm": 3.0028862953186035,
311
+ "learning_rate": 1.7973856209150328e-06,
312
+ "loss": 0.2121,
313
+ "step": 230
314
+ },
315
+ {
316
+ "epoch": 13.05,
317
+ "eval_accuracy": 0.9256198347107438,
318
+ "eval_accuracy_gunit": 0.85,
319
+ "eval_accuracy_nothing": 0.9629629629629629,
320
+ "eval_loss": 0.26187241077423096,
321
+ "eval_runtime": 15.6231,
322
+ "eval_samples_per_second": 7.745,
323
+ "eval_steps_per_second": 1.024,
324
+ "step": 238
325
+ },
326
+ {
327
+ "epoch": 14.005882352941176,
328
+ "grad_norm": 11.049546241760254,
329
+ "learning_rate": 1.6339869281045753e-06,
330
+ "loss": 0.1814,
331
+ "step": 240
332
+ },
333
+ {
334
+ "epoch": 14.035294117647059,
335
+ "grad_norm": 13.598651885986328,
336
+ "learning_rate": 1.4705882352941177e-06,
337
+ "loss": 0.1506,
338
+ "step": 250
339
+ },
340
+ {
341
+ "epoch": 14.05,
342
+ "eval_accuracy": 0.9504132231404959,
343
+ "eval_accuracy_gunit": 0.85,
344
+ "eval_accuracy_nothing": 1.0,
345
+ "eval_loss": 0.2190253734588623,
346
+ "eval_runtime": 15.7623,
347
+ "eval_samples_per_second": 7.677,
348
+ "eval_steps_per_second": 1.015,
349
+ "step": 255
350
+ },
351
+ {
352
+ "epoch": 15.014705882352942,
353
+ "grad_norm": 5.206620693206787,
354
+ "learning_rate": 1.3071895424836604e-06,
355
+ "loss": 0.1518,
356
+ "step": 260
357
+ },
358
+ {
359
+ "epoch": 15.044117647058824,
360
+ "grad_norm": 15.521167755126953,
361
+ "learning_rate": 1.1437908496732026e-06,
362
+ "loss": 0.1335,
363
+ "step": 270
364
+ },
365
+ {
366
+ "epoch": 15.05,
367
+ "eval_accuracy": 0.9338842975206612,
368
+ "eval_accuracy_gunit": 0.85,
369
+ "eval_accuracy_nothing": 0.9753086419753086,
370
+ "eval_loss": 0.22213229537010193,
371
+ "eval_runtime": 15.6243,
372
+ "eval_samples_per_second": 7.744,
373
+ "eval_steps_per_second": 1.024,
374
+ "step": 272
375
+ },
376
+ {
377
+ "epoch": 16.023529411764706,
378
+ "grad_norm": 3.6706037521362305,
379
+ "learning_rate": 9.80392156862745e-07,
380
+ "loss": 0.1039,
381
+ "step": 280
382
+ },
383
+ {
384
+ "epoch": 16.05,
385
+ "eval_accuracy": 0.9421487603305785,
386
+ "eval_accuracy_gunit": 0.85,
387
+ "eval_accuracy_nothing": 0.9876543209876543,
388
+ "eval_loss": 0.2107107788324356,
389
+ "eval_runtime": 15.8386,
390
+ "eval_samples_per_second": 7.64,
391
+ "eval_steps_per_second": 1.01,
392
+ "step": 289
393
+ },
394
+ {
395
+ "epoch": 17.00294117647059,
396
+ "grad_norm": 2.560628890991211,
397
+ "learning_rate": 8.169934640522876e-07,
398
+ "loss": 0.1959,
399
+ "step": 290
400
+ },
401
+ {
402
+ "epoch": 17.03235294117647,
403
+ "grad_norm": 13.137368202209473,
404
+ "learning_rate": 6.535947712418302e-07,
405
+ "loss": 0.1036,
406
+ "step": 300
407
+ },
408
+ {
409
+ "epoch": 17.05,
410
+ "eval_accuracy": 0.9421487603305785,
411
+ "eval_accuracy_gunit": 0.85,
412
+ "eval_accuracy_nothing": 0.9876543209876543,
413
+ "eval_loss": 0.20594191551208496,
414
+ "eval_runtime": 15.7489,
415
+ "eval_samples_per_second": 7.683,
416
+ "eval_steps_per_second": 1.016,
417
+ "step": 306
418
+ },
419
+ {
420
+ "epoch": 18.011764705882353,
421
+ "grad_norm": 105.94062805175781,
422
+ "learning_rate": 4.901960784313725e-07,
423
+ "loss": 0.139,
424
+ "step": 310
425
+ },
426
+ {
427
+ "epoch": 18.041176470588237,
428
+ "grad_norm": 7.893646240234375,
429
+ "learning_rate": 3.267973856209151e-07,
430
+ "loss": 0.1019,
431
+ "step": 320
432
+ },
433
+ {
434
+ "epoch": 18.05,
435
+ "eval_accuracy": 0.9421487603305785,
436
+ "eval_accuracy_gunit": 0.85,
437
+ "eval_accuracy_nothing": 0.9876543209876543,
438
+ "eval_loss": 0.2062729150056839,
439
+ "eval_runtime": 15.7147,
440
+ "eval_samples_per_second": 7.7,
441
+ "eval_steps_per_second": 1.018,
442
+ "step": 323
443
+ },
444
+ {
445
+ "epoch": 19.020588235294117,
446
+ "grad_norm": 4.479135036468506,
447
+ "learning_rate": 1.6339869281045755e-07,
448
+ "loss": 0.0837,
449
+ "step": 330
450
+ },
451
+ {
452
+ "epoch": 19.05,
453
+ "grad_norm": 84.62340545654297,
454
+ "learning_rate": 0.0,
455
+ "loss": 0.1181,
456
+ "step": 340
457
+ },
458
+ {
459
+ "epoch": 19.05,
460
+ "eval_accuracy": 0.9504132231404959,
461
+ "eval_accuracy_gunit": 0.85,
462
+ "eval_accuracy_nothing": 1.0,
463
+ "eval_loss": 0.21026436984539032,
464
+ "eval_runtime": 15.8645,
465
+ "eval_samples_per_second": 7.627,
466
+ "eval_steps_per_second": 1.009,
467
+ "step": 340
468
+ },
469
+ {
470
+ "epoch": 19.05,
471
+ "step": 340,
472
+ "total_flos": 3.339453099784274e+18,
473
+ "train_loss": 0.3907703813384561,
474
+ "train_runtime": 996.3737,
475
+ "train_samples_per_second": 2.73,
476
+ "train_steps_per_second": 0.341
477
+ },
478
+ {
479
+ "epoch": 19.05,
480
+ "eval_accuracy": 0.8932038834951457,
481
+ "eval_accuracy_gunit": 0.5416666666666666,
482
+ "eval_accuracy_nothing": 1.0,
483
+ "eval_loss": 0.34902578592300415,
484
+ "eval_runtime": 14.5221,
485
+ "eval_samples_per_second": 7.093,
486
+ "eval_steps_per_second": 0.895,
487
+ "step": 340
488
+ },
489
+ {
490
+ "epoch": 19.05,
491
+ "eval_accuracy": 0.8932038834951457,
492
+ "eval_accuracy_gunit": 0.5416666666666666,
493
+ "eval_accuracy_nothing": 1.0,
494
+ "eval_loss": 0.3490257263183594,
495
+ "eval_runtime": 13.7098,
496
+ "eval_samples_per_second": 7.513,
497
+ "eval_steps_per_second": 0.948,
498
+ "step": 340
499
+ }
500
+ ],
501
+ "logging_steps": 10,
502
+ "max_steps": 340,
503
+ "num_input_tokens_seen": 0,
504
+ "num_train_epochs": 9223372036854775807,
505
+ "save_steps": 500,
506
+ "stateful_callbacks": {
507
+ "TrainerControl": {
508
+ "args": {
509
+ "should_epoch_stop": false,
510
+ "should_evaluate": false,
511
+ "should_log": false,
512
+ "should_save": true,
513
+ "should_training_stop": true
514
+ },
515
+ "attributes": {}
516
+ }
517
+ },
518
+ "total_flos": 3.339453099784274e+18,
519
+ "train_batch_size": 8,
520
+ "trial_name": null,
521
+ "trial_params": null
522
+ }