PergaZuZ commited on
Commit
3f786e6
·
verified ·
1 Parent(s): 9bbf705

End of training

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. all_results.json +6 -6
  3. test_results.json +6 -6
  4. trainer_state.json +149 -261
README.md CHANGED
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [MCG-NJU/videomae-base](https://huggingface.co/MCG-NJU/videomae-base) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.2252
22
  - Accuracy: 0.9290
23
 
24
  ## Model description
 
18
 
19
  This model is a fine-tuned version of [MCG-NJU/videomae-base](https://huggingface.co/MCG-NJU/videomae-base) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.2267
22
  - Accuracy: 0.9290
23
 
24
  ## Model description
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.113333333333333,
3
- "eval_accuracy": 0.9548387096774194,
4
- "eval_loss": 0.1593276411294937,
5
- "eval_runtime": 16.3684,
6
- "eval_samples_per_second": 9.469,
7
- "eval_steps_per_second": 1.222
8
  }
 
1
  {
2
+ "epoch": 7.101351351351352,
3
+ "eval_accuracy": 0.9290322580645162,
4
+ "eval_loss": 0.2266511768102646,
5
+ "eval_runtime": 13.6046,
6
+ "eval_samples_per_second": 11.393,
7
+ "eval_steps_per_second": 0.735
8
  }
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.113333333333333,
3
- "eval_accuracy": 0.9548387096774194,
4
- "eval_loss": 0.1593276411294937,
5
- "eval_runtime": 16.3684,
6
- "eval_samples_per_second": 9.469,
7
- "eval_steps_per_second": 1.222
8
  }
 
1
  {
2
+ "epoch": 7.101351351351352,
3
+ "eval_accuracy": 0.9290322580645162,
4
+ "eval_loss": 0.2266511768102646,
5
+ "eval_runtime": 13.6046,
6
+ "eval_samples_per_second": 11.393,
7
+ "eval_steps_per_second": 0.735
8
  }
trainer_state.json CHANGED
@@ -1,325 +1,213 @@
1
  {
2
- "best_metric": 0.9714285714285714,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-300",
4
- "epoch": 7.113333333333333,
5
  "eval_steps": 500,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.03333333333333333,
13
- "grad_norm": 6.439655780792236,
14
- "learning_rate": 1.6666666666666667e-05,
15
- "loss": 2.3256,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.06666666666666667,
20
- "grad_norm": 6.954097270965576,
21
- "learning_rate": 3.3333333333333335e-05,
22
- "loss": 2.2799,
 
 
 
 
 
 
 
 
 
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.1,
27
- "grad_norm": 7.330563068389893,
28
- "learning_rate": 5e-05,
29
- "loss": 2.2089,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.12666666666666668,
34
- "eval_accuracy": 0.5571428571428572,
35
- "eval_loss": 1.9672648906707764,
36
- "eval_runtime": 8.9839,
37
- "eval_samples_per_second": 7.792,
38
- "eval_steps_per_second": 1.002,
39
  "step": 38
40
  },
41
  {
42
- "epoch": 1.0066666666666666,
43
- "grad_norm": 8.947251319885254,
44
- "learning_rate": 4.814814814814815e-05,
45
- "loss": 2.1083,
46
  "step": 40
47
  },
48
  {
49
- "epoch": 1.04,
50
- "grad_norm": 10.695717811584473,
51
- "learning_rate": 4.62962962962963e-05,
52
- "loss": 1.6875,
53
  "step": 50
54
  },
55
  {
56
- "epoch": 1.0733333333333333,
57
- "grad_norm": 10.43160343170166,
58
- "learning_rate": 4.4444444444444447e-05,
59
- "loss": 1.4401,
 
 
 
 
 
 
 
 
 
60
  "step": 60
61
  },
62
  {
63
- "epoch": 1.1066666666666667,
64
- "grad_norm": 7.0906219482421875,
65
- "learning_rate": 4.259259259259259e-05,
66
- "loss": 0.9255,
67
  "step": 70
68
  },
69
  {
70
- "epoch": 1.1266666666666667,
71
- "eval_accuracy": 0.7142857142857143,
72
- "eval_loss": 0.9603942036628723,
73
- "eval_runtime": 7.6642,
74
- "eval_samples_per_second": 9.133,
75
- "eval_steps_per_second": 1.174,
76
  "step": 76
77
  },
78
  {
79
- "epoch": 2.013333333333333,
80
- "grad_norm": 7.673371315002441,
81
- "learning_rate": 4.074074074074074e-05,
82
- "loss": 0.905,
83
  "step": 80
84
  },
85
  {
86
- "epoch": 2.046666666666667,
87
- "grad_norm": 6.490078926086426,
88
- "learning_rate": 3.888888888888889e-05,
89
- "loss": 0.6113,
90
  "step": 90
91
  },
92
  {
93
- "epoch": 2.08,
94
- "grad_norm": 13.964165687561035,
95
- "learning_rate": 3.7037037037037037e-05,
96
- "loss": 0.5379,
 
 
 
 
 
 
 
 
 
97
  "step": 100
98
  },
99
  {
100
- "epoch": 2.1133333333333333,
101
- "grad_norm": 5.957258224487305,
102
- "learning_rate": 3.518518518518519e-05,
103
- "loss": 0.4511,
104
  "step": 110
105
  },
106
  {
107
- "epoch": 2.1266666666666665,
108
- "eval_accuracy": 0.7428571428571429,
109
- "eval_loss": 0.5807345509529114,
110
- "eval_runtime": 8.3663,
111
- "eval_samples_per_second": 8.367,
112
- "eval_steps_per_second": 1.076,
113
  "step": 114
114
  },
115
  {
116
- "epoch": 3.02,
117
- "grad_norm": 4.369177341461182,
118
- "learning_rate": 3.3333333333333335e-05,
119
- "loss": 0.2852,
120
  "step": 120
121
  },
122
  {
123
- "epoch": 3.0533333333333332,
124
- "grad_norm": 2.6237568855285645,
125
- "learning_rate": 3.148148148148148e-05,
126
- "loss": 0.2286,
127
  "step": 130
128
  },
129
  {
130
- "epoch": 3.086666666666667,
131
- "grad_norm": 5.644320964813232,
132
- "learning_rate": 2.962962962962963e-05,
133
- "loss": 0.2827,
134
- "step": 140
135
- },
136
- {
137
- "epoch": 3.12,
138
- "grad_norm": 11.105957984924316,
139
- "learning_rate": 2.777777777777778e-05,
140
- "loss": 0.2102,
141
- "step": 150
142
- },
143
- {
144
- "epoch": 3.1266666666666665,
145
- "eval_accuracy": 0.8857142857142857,
146
- "eval_loss": 0.33364787697792053,
147
- "eval_runtime": 6.7638,
148
- "eval_samples_per_second": 10.349,
149
- "eval_steps_per_second": 1.331,
150
- "step": 152
151
- },
152
- {
153
- "epoch": 4.026666666666666,
154
- "grad_norm": 3.0925567150115967,
155
- "learning_rate": 2.5925925925925925e-05,
156
- "loss": 0.3526,
157
- "step": 160
158
- },
159
- {
160
- "epoch": 4.06,
161
- "grad_norm": 2.3389172554016113,
162
- "learning_rate": 2.4074074074074074e-05,
163
- "loss": 0.3044,
164
- "step": 170
165
- },
166
- {
167
- "epoch": 4.093333333333334,
168
- "grad_norm": 15.626021385192871,
169
- "learning_rate": 2.2222222222222223e-05,
170
- "loss": 0.1253,
171
- "step": 180
172
- },
173
- {
174
- "epoch": 4.126666666666667,
175
- "grad_norm": 26.906152725219727,
176
- "learning_rate": 2.037037037037037e-05,
177
- "loss": 0.0994,
178
- "step": 190
179
- },
180
- {
181
- "epoch": 4.126666666666667,
182
- "eval_accuracy": 0.9571428571428572,
183
- "eval_loss": 0.13536687195301056,
184
- "eval_runtime": 7.4727,
185
- "eval_samples_per_second": 9.367,
186
- "eval_steps_per_second": 1.204,
187
- "step": 190
188
- },
189
- {
190
- "epoch": 5.033333333333333,
191
- "grad_norm": 0.45196783542633057,
192
- "learning_rate": 1.8518518518518518e-05,
193
- "loss": 0.1249,
194
- "step": 200
195
- },
196
- {
197
- "epoch": 5.066666666666666,
198
- "grad_norm": 1.2230411767959595,
199
- "learning_rate": 1.6666666666666667e-05,
200
- "loss": 0.0302,
201
- "step": 210
202
- },
203
- {
204
- "epoch": 5.1,
205
- "grad_norm": 0.2508547604084015,
206
- "learning_rate": 1.4814814814814815e-05,
207
- "loss": 0.1416,
208
- "step": 220
209
- },
210
- {
211
- "epoch": 5.126666666666667,
212
  "eval_accuracy": 0.9,
213
- "eval_loss": 0.2323966920375824,
214
- "eval_runtime": 7.8724,
215
- "eval_samples_per_second": 8.892,
216
- "eval_steps_per_second": 1.143,
217
- "step": 228
218
- },
219
- {
220
- "epoch": 6.006666666666667,
221
- "grad_norm": 0.1509472280740738,
222
- "learning_rate": 1.2962962962962962e-05,
223
- "loss": 0.056,
224
- "step": 230
225
  },
226
  {
227
- "epoch": 6.04,
228
- "grad_norm": 17.42368507385254,
229
- "learning_rate": 1.1111111111111112e-05,
230
- "loss": 0.1302,
231
- "step": 240
232
- },
233
- {
234
- "epoch": 6.073333333333333,
235
- "grad_norm": 0.16968391835689545,
236
- "learning_rate": 9.259259259259259e-06,
237
- "loss": 0.0488,
238
- "step": 250
239
- },
240
- {
241
- "epoch": 6.1066666666666665,
242
- "grad_norm": 0.1539120376110077,
243
- "learning_rate": 7.4074074074074075e-06,
244
- "loss": 0.0301,
245
- "step": 260
246
- },
247
- {
248
- "epoch": 6.126666666666667,
249
- "eval_accuracy": 0.9428571428571428,
250
- "eval_loss": 0.17422690987586975,
251
- "eval_runtime": 8.163,
252
- "eval_samples_per_second": 8.575,
253
- "eval_steps_per_second": 1.103,
254
- "step": 266
255
- },
256
- {
257
- "epoch": 7.013333333333334,
258
- "grad_norm": 0.12634336948394775,
259
- "learning_rate": 5.555555555555556e-06,
260
- "loss": 0.0144,
261
- "step": 270
262
- },
263
- {
264
- "epoch": 7.046666666666667,
265
- "grad_norm": 0.1565423160791397,
266
- "learning_rate": 3.7037037037037037e-06,
267
- "loss": 0.0141,
268
- "step": 280
269
- },
270
- {
271
- "epoch": 7.08,
272
- "grad_norm": 0.09596653282642365,
273
- "learning_rate": 1.8518518518518519e-06,
274
- "loss": 0.0144,
275
- "step": 290
276
- },
277
- {
278
- "epoch": 7.113333333333333,
279
- "grad_norm": 0.09075827151536942,
280
- "learning_rate": 0.0,
281
- "loss": 0.023,
282
- "step": 300
283
- },
284
- {
285
- "epoch": 7.113333333333333,
286
- "eval_accuracy": 0.9714285714285714,
287
- "eval_loss": 0.0840597078204155,
288
- "eval_runtime": 7.7829,
289
- "eval_samples_per_second": 8.994,
290
- "eval_steps_per_second": 1.156,
291
- "step": 300
292
- },
293
- {
294
- "epoch": 7.113333333333333,
295
- "step": 300,
296
- "total_flos": 2.955876078527447e+18,
297
- "train_loss": 0.5999104872345924,
298
- "train_runtime": 483.0932,
299
- "train_samples_per_second": 4.968,
300
- "train_steps_per_second": 0.621
301
- },
302
- {
303
- "epoch": 7.113333333333333,
304
- "eval_accuracy": 0.9548387096774194,
305
- "eval_loss": 0.15970781445503235,
306
- "eval_runtime": 18.7889,
307
- "eval_samples_per_second": 8.25,
308
- "eval_steps_per_second": 1.064,
309
- "step": 300
310
  },
311
  {
312
- "epoch": 7.113333333333333,
313
- "eval_accuracy": 0.9548387096774194,
314
- "eval_loss": 0.1593276411294937,
315
- "eval_runtime": 16.3684,
316
- "eval_samples_per_second": 9.469,
317
- "eval_steps_per_second": 1.222,
318
- "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  }
320
  ],
321
  "logging_steps": 10,
322
- "max_steps": 300,
323
  "num_input_tokens_seen": 0,
324
  "num_train_epochs": 9223372036854775807,
325
  "save_steps": 500,
@@ -335,8 +223,8 @@
335
  "attributes": {}
336
  }
337
  },
338
- "total_flos": 2.955876078527447e+18,
339
- "train_batch_size": 8,
340
  "trial_name": null,
341
  "trial_params": null
342
  }
 
1
  {
2
+ "best_metric": 0.9142857142857143,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-148",
4
+ "epoch": 7.101351351351352,
5
  "eval_steps": 500,
6
+ "global_step": 148,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.06756756756756757,
13
+ "grad_norm": 6.34252405166626,
14
+ "learning_rate": 3.3333333333333335e-05,
15
+ "loss": 2.0466,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.12837837837837837,
20
+ "eval_accuracy": 0.6142857142857143,
21
+ "eval_loss": 1.6348506212234497,
22
+ "eval_runtime": 6.1663,
23
+ "eval_samples_per_second": 11.352,
24
+ "eval_steps_per_second": 0.811,
25
+ "step": 19
26
+ },
27
+ {
28
+ "epoch": 1.0067567567567568,
29
+ "grad_norm": 7.780186176300049,
30
+ "learning_rate": 4.81203007518797e-05,
31
+ "loss": 1.8425,
32
  "step": 20
33
  },
34
  {
35
+ "epoch": 1.0743243243243243,
36
+ "grad_norm": 7.907790184020996,
37
+ "learning_rate": 4.43609022556391e-05,
38
+ "loss": 1.348,
39
  "step": 30
40
  },
41
  {
42
+ "epoch": 1.1283783783783783,
43
+ "eval_accuracy": 0.8428571428571429,
44
+ "eval_loss": 0.804053783416748,
45
+ "eval_runtime": 6.6922,
46
+ "eval_samples_per_second": 10.46,
47
+ "eval_steps_per_second": 0.747,
48
  "step": 38
49
  },
50
  {
51
+ "epoch": 2.0135135135135136,
52
+ "grad_norm": 5.862756729125977,
53
+ "learning_rate": 4.0601503759398494e-05,
54
+ "loss": 0.8369,
55
  "step": 40
56
  },
57
  {
58
+ "epoch": 2.081081081081081,
59
+ "grad_norm": 5.44201135635376,
60
+ "learning_rate": 3.6842105263157895e-05,
61
+ "loss": 0.6208,
62
  "step": 50
63
  },
64
  {
65
+ "epoch": 2.1283783783783785,
66
+ "eval_accuracy": 0.7285714285714285,
67
+ "eval_loss": 0.7582912445068359,
68
+ "eval_runtime": 6.1646,
69
+ "eval_samples_per_second": 11.355,
70
+ "eval_steps_per_second": 0.811,
71
+ "step": 57
72
+ },
73
+ {
74
+ "epoch": 3.02027027027027,
75
+ "grad_norm": 12.206607818603516,
76
+ "learning_rate": 3.3082706766917295e-05,
77
+ "loss": 0.5199,
78
  "step": 60
79
  },
80
  {
81
+ "epoch": 3.0878378378378377,
82
+ "grad_norm": 5.501271724700928,
83
+ "learning_rate": 2.9323308270676693e-05,
84
+ "loss": 0.332,
85
  "step": 70
86
  },
87
  {
88
+ "epoch": 3.1283783783783785,
89
+ "eval_accuracy": 0.8285714285714286,
90
+ "eval_loss": 0.45565441250801086,
91
+ "eval_runtime": 6.0418,
92
+ "eval_samples_per_second": 11.586,
93
+ "eval_steps_per_second": 0.828,
94
  "step": 76
95
  },
96
  {
97
+ "epoch": 4.027027027027027,
98
+ "grad_norm": 7.0609965324401855,
99
+ "learning_rate": 2.556390977443609e-05,
100
+ "loss": 0.2761,
101
  "step": 80
102
  },
103
  {
104
+ "epoch": 4.094594594594595,
105
+ "grad_norm": 3.5807061195373535,
106
+ "learning_rate": 2.1804511278195487e-05,
107
+ "loss": 0.2229,
108
  "step": 90
109
  },
110
  {
111
+ "epoch": 4.128378378378378,
112
+ "eval_accuracy": 0.8857142857142857,
113
+ "eval_loss": 0.3132798373699188,
114
+ "eval_runtime": 6.754,
115
+ "eval_samples_per_second": 10.364,
116
+ "eval_steps_per_second": 0.74,
117
+ "step": 95
118
+ },
119
+ {
120
+ "epoch": 5.033783783783784,
121
+ "grad_norm": 1.8226597309112549,
122
+ "learning_rate": 1.8045112781954888e-05,
123
+ "loss": 0.2292,
124
  "step": 100
125
  },
126
  {
127
+ "epoch": 5.101351351351352,
128
+ "grad_norm": 6.401218414306641,
129
+ "learning_rate": 1.4285714285714285e-05,
130
+ "loss": 0.1479,
131
  "step": 110
132
  },
133
  {
134
+ "epoch": 5.128378378378378,
135
+ "eval_accuracy": 0.9,
136
+ "eval_loss": 0.2872016131877899,
137
+ "eval_runtime": 5.5369,
138
+ "eval_samples_per_second": 12.643,
139
+ "eval_steps_per_second": 0.903,
140
  "step": 114
141
  },
142
  {
143
+ "epoch": 6.04054054054054,
144
+ "grad_norm": 4.436432361602783,
145
+ "learning_rate": 1.0526315789473684e-05,
146
+ "loss": 0.0848,
147
  "step": 120
148
  },
149
  {
150
+ "epoch": 6.108108108108108,
151
+ "grad_norm": 1.9256197214126587,
152
+ "learning_rate": 6.766917293233083e-06,
153
+ "loss": 0.0761,
154
  "step": 130
155
  },
156
  {
157
+ "epoch": 6.128378378378378,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  "eval_accuracy": 0.9,
159
+ "eval_loss": 0.28876611590385437,
160
+ "eval_runtime": 6.7507,
161
+ "eval_samples_per_second": 10.369,
162
+ "eval_steps_per_second": 0.741,
163
+ "step": 133
 
 
 
 
 
 
 
164
  },
165
  {
166
+ "epoch": 7.047297297297297,
167
+ "grad_norm": 7.098940372467041,
168
+ "learning_rate": 3.007518796992481e-06,
169
+ "loss": 0.0696,
170
+ "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  },
172
  {
173
+ "epoch": 7.101351351351352,
174
+ "eval_accuracy": 0.9142857142857143,
175
+ "eval_loss": 0.26643699407577515,
176
+ "eval_runtime": 6.7738,
177
+ "eval_samples_per_second": 10.334,
178
+ "eval_steps_per_second": 0.738,
179
+ "step": 148
180
+ },
181
+ {
182
+ "epoch": 7.101351351351352,
183
+ "step": 148,
184
+ "total_flos": 2.915999166844109e+18,
185
+ "train_loss": 0.5872005439690642,
186
+ "train_runtime": 676.4093,
187
+ "train_samples_per_second": 3.501,
188
+ "train_steps_per_second": 0.219
189
+ },
190
+ {
191
+ "epoch": 7.101351351351352,
192
+ "eval_accuracy": 0.9290322580645162,
193
+ "eval_loss": 0.2252088487148285,
194
+ "eval_runtime": 13.824,
195
+ "eval_samples_per_second": 11.212,
196
+ "eval_steps_per_second": 0.723,
197
+ "step": 148
198
+ },
199
+ {
200
+ "epoch": 7.101351351351352,
201
+ "eval_accuracy": 0.9290322580645162,
202
+ "eval_loss": 0.2266511768102646,
203
+ "eval_runtime": 13.6046,
204
+ "eval_samples_per_second": 11.393,
205
+ "eval_steps_per_second": 0.735,
206
+ "step": 148
207
  }
208
  ],
209
  "logging_steps": 10,
210
+ "max_steps": 148,
211
  "num_input_tokens_seen": 0,
212
  "num_train_epochs": 9223372036854775807,
213
  "save_steps": 500,
 
223
  "attributes": {}
224
  }
225
  },
226
+ "total_flos": 2.915999166844109e+18,
227
+ "train_batch_size": 16,
228
  "trial_name": null,
229
  "trial_params": null
230
  }