Melo1512 commited on
Commit
cab9489
·
verified ·
1 Parent(s): 9a0df9b

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.8732394366197183
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [Melo1512/vit-msn-small-beta-fia-manually-enhanced-HSV_test_2](https://huggingface.co/Melo1512/vit-msn-small-beta-fia-manually-enhanced-HSV_test_2) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.5343
36
- - Accuracy: 0.8732
37
 
38
  ## Model description
39
 
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.8802816901408451
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [Melo1512/vit-msn-small-beta-fia-manually-enhanced-HSV_test_2](https://huggingface.co/Melo1512/vit-msn-small-beta-fia-manually-enhanced-HSV_test_2) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.5013
36
+ - Accuracy: 0.8803
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 28.571428571428573,
3
- "eval_accuracy": 0.8873239436619719,
4
- "eval_loss": 0.5122641324996948,
5
- "eval_runtime": 0.6466,
6
- "eval_samples_per_second": 219.608,
7
- "eval_steps_per_second": 4.64,
8
  "total_flos": 2.3842598606630093e+17,
9
- "train_loss": 0.4234132957458496,
10
- "train_runtime": 127.7102,
11
- "train_samples_per_second": 166.784,
12
- "train_steps_per_second": 0.392
13
  }
 
1
  {
2
  "epoch": 28.571428571428573,
3
+ "eval_accuracy": 0.8802816901408451,
4
+ "eval_loss": 0.5013329386711121,
5
+ "eval_runtime": 0.6221,
6
+ "eval_samples_per_second": 228.251,
7
+ "eval_steps_per_second": 4.822,
8
  "total_flos": 2.3842598606630093e+17,
9
+ "train_loss": 0.4329641246795654,
10
+ "train_runtime": 127.8111,
11
+ "train_samples_per_second": 166.652,
12
+ "train_steps_per_second": 0.391
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 28.571428571428573,
3
- "eval_accuracy": 0.8873239436619719,
4
- "eval_loss": 0.5122641324996948,
5
- "eval_runtime": 0.6466,
6
- "eval_samples_per_second": 219.608,
7
- "eval_steps_per_second": 4.64
8
  }
 
1
  {
2
  "epoch": 28.571428571428573,
3
+ "eval_accuracy": 0.8802816901408451,
4
+ "eval_loss": 0.5013329386711121,
5
+ "eval_runtime": 0.6221,
6
+ "eval_samples_per_second": 228.251,
7
+ "eval_steps_per_second": 4.822
8
  }
runs/Jan27_17-20-31_db1093ce036b/events.out.tfevents.1737998729.db1093ce036b.224.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8332d9f0d9802a77fa0c303af28b8ac86cc7cf33a4549c2cad27e24b29a453de
3
+ size 405
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 28.571428571428573,
3
  "total_flos": 2.3842598606630093e+17,
4
- "train_loss": 0.4234132957458496,
5
- "train_runtime": 127.7102,
6
- "train_samples_per_second": 166.784,
7
- "train_steps_per_second": 0.392
8
  }
 
1
  {
2
  "epoch": 28.571428571428573,
3
  "total_flos": 2.3842598606630093e+17,
4
+ "train_loss": 0.4329641246795654,
5
+ "train_runtime": 127.8111,
6
+ "train_samples_per_second": 166.652,
7
+ "train_steps_per_second": 0.391
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.5122641324996948,
3
- "best_model_checkpoint": "vit-msn-small-beta-fia-manually-enhanced-HSV_test_3/checkpoint-1",
4
  "epoch": 28.571428571428573,
5
  "eval_steps": 500,
6
  "global_step": 50,
@@ -12,306 +12,306 @@
12
  "epoch": 0.5714285714285714,
13
  "eval_accuracy": 0.8873239436619719,
14
  "eval_loss": 0.5122641324996948,
15
- "eval_runtime": 0.5861,
16
- "eval_samples_per_second": 242.264,
17
- "eval_steps_per_second": 5.118,
18
  "step": 1
19
  },
20
  {
21
  "epoch": 1.7142857142857144,
22
- "eval_accuracy": 0.8661971830985915,
23
- "eval_loss": 0.5494502186775208,
24
- "eval_runtime": 0.5821,
25
- "eval_samples_per_second": 243.962,
26
- "eval_steps_per_second": 5.154,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 2.857142857142857,
31
- "eval_accuracy": 0.8591549295774648,
32
- "eval_loss": 0.6079620122909546,
33
- "eval_runtime": 0.5831,
34
- "eval_samples_per_second": 243.534,
35
  "eval_steps_per_second": 5.145,
36
  "step": 5
37
  },
38
  {
39
  "epoch": 4.0,
40
  "eval_accuracy": 0.8732394366197183,
41
- "eval_loss": 0.5590734481811523,
42
- "eval_runtime": 0.6248,
43
- "eval_samples_per_second": 227.269,
44
- "eval_steps_per_second": 4.801,
45
  "step": 7
46
  },
47
  {
48
  "epoch": 4.571428571428571,
49
- "eval_accuracy": 0.8732394366197183,
50
- "eval_loss": 0.5464029312133789,
51
- "eval_runtime": 0.6237,
52
- "eval_samples_per_second": 227.68,
53
- "eval_steps_per_second": 4.81,
54
  "step": 8
55
  },
56
  {
57
  "epoch": 5.714285714285714,
58
- "grad_norm": 4.793102264404297,
59
  "learning_rate": 9.523809523809525e-06,
60
- "loss": 0.4241,
61
  "step": 10
62
  },
63
  {
64
  "epoch": 5.714285714285714,
65
- "eval_accuracy": 0.8450704225352113,
66
- "eval_loss": 0.5981650352478027,
67
- "eval_runtime": 0.6008,
68
- "eval_samples_per_second": 236.347,
69
- "eval_steps_per_second": 4.993,
70
  "step": 10
71
  },
72
  {
73
  "epoch": 6.857142857142857,
74
- "eval_accuracy": 0.8169014084507042,
75
- "eval_loss": 0.6497244238853455,
76
- "eval_runtime": 0.6066,
77
- "eval_samples_per_second": 234.109,
78
- "eval_steps_per_second": 4.946,
79
  "step": 12
80
  },
81
  {
82
  "epoch": 8.0,
83
- "eval_accuracy": 0.852112676056338,
84
- "eval_loss": 0.5927726030349731,
85
- "eval_runtime": 0.627,
86
- "eval_samples_per_second": 226.478,
87
- "eval_steps_per_second": 4.785,
88
  "step": 14
89
  },
90
  {
91
  "epoch": 8.571428571428571,
92
- "eval_accuracy": 0.852112676056338,
93
- "eval_loss": 0.5711137056350708,
94
- "eval_runtime": 0.6061,
95
- "eval_samples_per_second": 234.301,
96
- "eval_steps_per_second": 4.95,
97
  "step": 15
98
  },
99
  {
100
  "epoch": 9.714285714285714,
101
- "eval_accuracy": 0.8732394366197183,
102
- "eval_loss": 0.5468315482139587,
103
- "eval_runtime": 0.5742,
104
- "eval_samples_per_second": 247.304,
105
- "eval_steps_per_second": 5.225,
106
  "step": 17
107
  },
108
  {
109
  "epoch": 10.857142857142858,
110
- "eval_accuracy": 0.852112676056338,
111
- "eval_loss": 0.5482771992683411,
112
- "eval_runtime": 0.5841,
113
- "eval_samples_per_second": 243.112,
114
- "eval_steps_per_second": 5.136,
115
  "step": 19
116
  },
117
  {
118
  "epoch": 11.428571428571429,
119
- "grad_norm": 5.585962772369385,
120
  "learning_rate": 7.1428571428571436e-06,
121
- "loss": 0.4152,
122
  "step": 20
123
  },
124
  {
125
  "epoch": 12.0,
126
- "eval_accuracy": 0.8450704225352113,
127
- "eval_loss": 0.5783097147941589,
128
- "eval_runtime": 0.5721,
129
- "eval_samples_per_second": 248.198,
130
- "eval_steps_per_second": 5.244,
131
  "step": 21
132
  },
133
  {
134
  "epoch": 12.571428571428571,
135
- "eval_accuracy": 0.8450704225352113,
136
- "eval_loss": 0.5835375785827637,
137
- "eval_runtime": 0.6296,
138
- "eval_samples_per_second": 225.543,
139
- "eval_steps_per_second": 4.765,
140
  "step": 22
141
  },
142
  {
143
  "epoch": 13.714285714285714,
144
- "eval_accuracy": 0.8450704225352113,
145
- "eval_loss": 0.5668258666992188,
146
- "eval_runtime": 0.5869,
147
- "eval_samples_per_second": 241.934,
148
- "eval_steps_per_second": 5.111,
149
  "step": 24
150
  },
151
  {
152
  "epoch": 14.857142857142858,
153
- "eval_accuracy": 0.8450704225352113,
154
- "eval_loss": 0.555620014667511,
155
- "eval_runtime": 0.5858,
156
- "eval_samples_per_second": 242.413,
157
- "eval_steps_per_second": 5.121,
158
  "step": 26
159
  },
160
  {
161
  "epoch": 16.0,
162
- "eval_accuracy": 0.8450704225352113,
163
- "eval_loss": 0.5564189553260803,
164
- "eval_runtime": 0.5987,
165
- "eval_samples_per_second": 237.192,
166
- "eval_steps_per_second": 5.011,
167
  "step": 28
168
  },
169
  {
170
  "epoch": 16.571428571428573,
171
- "eval_accuracy": 0.8450704225352113,
172
- "eval_loss": 0.5590547919273376,
173
- "eval_runtime": 0.6483,
174
- "eval_samples_per_second": 219.036,
175
- "eval_steps_per_second": 4.628,
176
  "step": 29
177
  },
178
  {
179
  "epoch": 17.142857142857142,
180
- "grad_norm": 5.258753299713135,
181
  "learning_rate": 4.761904761904762e-06,
182
- "loss": 0.4367,
183
  "step": 30
184
  },
185
  {
186
  "epoch": 17.714285714285715,
187
  "eval_accuracy": 0.8591549295774648,
188
- "eval_loss": 0.5619198679924011,
189
- "eval_runtime": 0.6281,
190
- "eval_samples_per_second": 226.062,
191
- "eval_steps_per_second": 4.776,
192
  "step": 31
193
  },
194
  {
195
  "epoch": 18.857142857142858,
196
  "eval_accuracy": 0.8591549295774648,
197
- "eval_loss": 0.5809253454208374,
198
- "eval_runtime": 0.5953,
199
- "eval_samples_per_second": 238.516,
200
- "eval_steps_per_second": 5.039,
201
  "step": 33
202
  },
203
  {
204
  "epoch": 20.0,
205
- "eval_accuracy": 0.8661971830985915,
206
- "eval_loss": 0.5810067057609558,
207
- "eval_runtime": 0.626,
208
- "eval_samples_per_second": 226.835,
209
- "eval_steps_per_second": 4.792,
210
  "step": 35
211
  },
212
  {
213
  "epoch": 20.571428571428573,
214
  "eval_accuracy": 0.8661971830985915,
215
- "eval_loss": 0.5768489837646484,
216
- "eval_runtime": 0.682,
217
- "eval_samples_per_second": 208.199,
218
- "eval_steps_per_second": 4.399,
219
  "step": 36
220
  },
221
  {
222
  "epoch": 21.714285714285715,
223
- "eval_accuracy": 0.8732394366197183,
224
- "eval_loss": 0.5590782761573792,
225
- "eval_runtime": 0.665,
226
- "eval_samples_per_second": 213.54,
227
- "eval_steps_per_second": 4.511,
228
  "step": 38
229
  },
230
  {
231
  "epoch": 22.857142857142858,
232
- "grad_norm": 4.620666027069092,
233
  "learning_rate": 2.380952380952381e-06,
234
- "loss": 0.4241,
235
  "step": 40
236
  },
237
  {
238
  "epoch": 22.857142857142858,
239
- "eval_accuracy": 0.8732394366197183,
240
- "eval_loss": 0.5452097654342651,
241
- "eval_runtime": 0.5858,
242
- "eval_samples_per_second": 242.391,
243
- "eval_steps_per_second": 5.121,
244
  "step": 40
245
  },
246
  {
247
  "epoch": 24.0,
248
- "eval_accuracy": 0.8732394366197183,
249
- "eval_loss": 0.5387392640113831,
250
- "eval_runtime": 0.5833,
251
- "eval_samples_per_second": 243.447,
252
- "eval_steps_per_second": 5.143,
253
  "step": 42
254
  },
255
  {
256
  "epoch": 24.571428571428573,
257
- "eval_accuracy": 0.8732394366197183,
258
- "eval_loss": 0.5397770404815674,
259
- "eval_runtime": 0.6666,
260
- "eval_samples_per_second": 213.023,
261
- "eval_steps_per_second": 4.5,
262
  "step": 43
263
  },
264
  {
265
  "epoch": 25.714285714285715,
266
- "eval_accuracy": 0.8732394366197183,
267
- "eval_loss": 0.5457538962364197,
268
- "eval_runtime": 0.5797,
269
- "eval_samples_per_second": 244.962,
270
- "eval_steps_per_second": 5.175,
271
  "step": 45
272
  },
273
  {
274
  "epoch": 26.857142857142858,
275
  "eval_accuracy": 0.8732394366197183,
276
- "eval_loss": 0.5509300827980042,
277
- "eval_runtime": 0.6395,
278
- "eval_samples_per_second": 222.035,
279
- "eval_steps_per_second": 4.691,
280
  "step": 47
281
  },
282
  {
283
  "epoch": 28.0,
284
  "eval_accuracy": 0.8732394366197183,
285
- "eval_loss": 0.5549753904342651,
286
- "eval_runtime": 0.5878,
287
- "eval_samples_per_second": 241.566,
288
- "eval_steps_per_second": 5.103,
289
  "step": 49
290
  },
291
  {
292
  "epoch": 28.571428571428573,
293
- "grad_norm": 4.701329708099365,
294
  "learning_rate": 0.0,
295
- "loss": 0.4171,
296
  "step": 50
297
  },
298
  {
299
  "epoch": 28.571428571428573,
300
  "eval_accuracy": 0.8732394366197183,
301
- "eval_loss": 0.5557973980903625,
302
- "eval_runtime": 0.5942,
303
- "eval_samples_per_second": 238.984,
304
- "eval_steps_per_second": 5.049,
305
  "step": 50
306
  },
307
  {
308
  "epoch": 28.571428571428573,
309
  "step": 50,
310
  "total_flos": 2.3842598606630093e+17,
311
- "train_loss": 0.4234132957458496,
312
- "train_runtime": 127.7102,
313
- "train_samples_per_second": 166.784,
314
- "train_steps_per_second": 0.392
315
  }
316
  ],
317
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.5013329386711121,
3
+ "best_model_checkpoint": "vit-msn-small-beta-fia-manually-enhanced-HSV_test_3/checkpoint-17",
4
  "epoch": 28.571428571428573,
5
  "eval_steps": 500,
6
  "global_step": 50,
 
12
  "epoch": 0.5714285714285714,
13
  "eval_accuracy": 0.8873239436619719,
14
  "eval_loss": 0.5122641324996948,
15
+ "eval_runtime": 0.6602,
16
+ "eval_samples_per_second": 215.093,
17
+ "eval_steps_per_second": 4.544,
18
  "step": 1
19
  },
20
  {
21
  "epoch": 1.7142857142857144,
22
+ "eval_accuracy": 0.8873239436619719,
23
+ "eval_loss": 0.521920382976532,
24
+ "eval_runtime": 0.5873,
25
+ "eval_samples_per_second": 241.779,
26
+ "eval_steps_per_second": 5.108,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 2.857142857142857,
31
+ "eval_accuracy": 0.8732394366197183,
32
+ "eval_loss": 0.5431370139122009,
33
+ "eval_runtime": 0.583,
34
+ "eval_samples_per_second": 243.549,
35
  "eval_steps_per_second": 5.145,
36
  "step": 5
37
  },
38
  {
39
  "epoch": 4.0,
40
  "eval_accuracy": 0.8732394366197183,
41
+ "eval_loss": 0.5444017648696899,
42
+ "eval_runtime": 0.6551,
43
+ "eval_samples_per_second": 216.765,
44
+ "eval_steps_per_second": 4.58,
45
  "step": 7
46
  },
47
  {
48
  "epoch": 4.571428571428571,
49
+ "eval_accuracy": 0.8802816901408451,
50
+ "eval_loss": 0.5335883498191833,
51
+ "eval_runtime": 0.6183,
52
+ "eval_samples_per_second": 229.646,
53
+ "eval_steps_per_second": 4.852,
54
  "step": 8
55
  },
56
  {
57
  "epoch": 5.714285714285714,
58
+ "grad_norm": 4.845490455627441,
59
  "learning_rate": 9.523809523809525e-06,
60
+ "loss": 0.4252,
61
  "step": 10
62
  },
63
  {
64
  "epoch": 5.714285714285714,
65
+ "eval_accuracy": 0.8873239436619719,
66
+ "eval_loss": 0.5234636068344116,
67
+ "eval_runtime": 0.6093,
68
+ "eval_samples_per_second": 233.044,
69
+ "eval_steps_per_second": 4.923,
70
  "step": 10
71
  },
72
  {
73
  "epoch": 6.857142857142857,
74
+ "eval_accuracy": 0.8802816901408451,
75
+ "eval_loss": 0.5268605947494507,
76
+ "eval_runtime": 0.5951,
77
+ "eval_samples_per_second": 238.608,
78
+ "eval_steps_per_second": 5.041,
79
  "step": 12
80
  },
81
  {
82
  "epoch": 8.0,
83
+ "eval_accuracy": 0.8873239436619719,
84
+ "eval_loss": 0.5106273889541626,
85
+ "eval_runtime": 0.576,
86
+ "eval_samples_per_second": 246.532,
87
+ "eval_steps_per_second": 5.208,
88
  "step": 14
89
  },
90
  {
91
  "epoch": 8.571428571428571,
92
+ "eval_accuracy": 0.8873239436619719,
93
+ "eval_loss": 0.5047743916511536,
94
+ "eval_runtime": 0.6164,
95
+ "eval_samples_per_second": 230.361,
96
+ "eval_steps_per_second": 4.867,
97
  "step": 15
98
  },
99
  {
100
  "epoch": 9.714285714285714,
101
+ "eval_accuracy": 0.8802816901408451,
102
+ "eval_loss": 0.5013329386711121,
103
+ "eval_runtime": 0.5967,
104
+ "eval_samples_per_second": 237.985,
105
+ "eval_steps_per_second": 5.028,
106
  "step": 17
107
  },
108
  {
109
  "epoch": 10.857142857142858,
110
+ "eval_accuracy": 0.8802816901408451,
111
+ "eval_loss": 0.5104676485061646,
112
+ "eval_runtime": 0.6082,
113
+ "eval_samples_per_second": 233.487,
114
+ "eval_steps_per_second": 4.933,
115
  "step": 19
116
  },
117
  {
118
  "epoch": 11.428571428571429,
119
+ "grad_norm": 4.298947811126709,
120
  "learning_rate": 7.1428571428571436e-06,
121
+ "loss": 0.4413,
122
  "step": 20
123
  },
124
  {
125
  "epoch": 12.0,
126
+ "eval_accuracy": 0.8802816901408451,
127
+ "eval_loss": 0.525558352470398,
128
+ "eval_runtime": 0.6369,
129
+ "eval_samples_per_second": 222.939,
130
+ "eval_steps_per_second": 4.71,
131
  "step": 21
132
  },
133
  {
134
  "epoch": 12.571428571428571,
135
+ "eval_accuracy": 0.8732394366197183,
136
+ "eval_loss": 0.5303365588188171,
137
+ "eval_runtime": 0.5782,
138
+ "eval_samples_per_second": 245.582,
139
+ "eval_steps_per_second": 5.188,
140
  "step": 22
141
  },
142
  {
143
  "epoch": 13.714285714285714,
144
+ "eval_accuracy": 0.8661971830985915,
145
+ "eval_loss": 0.5218045115470886,
146
+ "eval_runtime": 0.5767,
147
+ "eval_samples_per_second": 246.238,
148
+ "eval_steps_per_second": 5.202,
149
  "step": 24
150
  },
151
  {
152
  "epoch": 14.857142857142858,
153
+ "eval_accuracy": 0.8591549295774648,
154
+ "eval_loss": 0.5187973380088806,
155
+ "eval_runtime": 0.5779,
156
+ "eval_samples_per_second": 245.713,
157
+ "eval_steps_per_second": 5.191,
158
  "step": 26
159
  },
160
  {
161
  "epoch": 16.0,
162
+ "eval_accuracy": 0.8591549295774648,
163
+ "eval_loss": 0.5202071666717529,
164
+ "eval_runtime": 0.5943,
165
+ "eval_samples_per_second": 238.943,
166
+ "eval_steps_per_second": 5.048,
167
  "step": 28
168
  },
169
  {
170
  "epoch": 16.571428571428573,
171
+ "eval_accuracy": 0.8591549295774648,
172
+ "eval_loss": 0.5252342820167542,
173
+ "eval_runtime": 0.5791,
174
+ "eval_samples_per_second": 245.212,
175
+ "eval_steps_per_second": 5.181,
176
  "step": 29
177
  },
178
  {
179
  "epoch": 17.142857142857142,
180
+ "grad_norm": 4.189113140106201,
181
  "learning_rate": 4.761904761904762e-06,
182
+ "loss": 0.437,
183
  "step": 30
184
  },
185
  {
186
  "epoch": 17.714285714285715,
187
  "eval_accuracy": 0.8591549295774648,
188
+ "eval_loss": 0.5384504199028015,
189
+ "eval_runtime": 0.6761,
190
+ "eval_samples_per_second": 210.04,
191
+ "eval_steps_per_second": 4.437,
192
  "step": 31
193
  },
194
  {
195
  "epoch": 18.857142857142858,
196
  "eval_accuracy": 0.8591549295774648,
197
+ "eval_loss": 0.5456344485282898,
198
+ "eval_runtime": 0.5817,
199
+ "eval_samples_per_second": 244.116,
200
+ "eval_steps_per_second": 5.157,
201
  "step": 33
202
  },
203
  {
204
  "epoch": 20.0,
205
+ "eval_accuracy": 0.8732394366197183,
206
+ "eval_loss": 0.5408889055252075,
207
+ "eval_runtime": 0.5889,
208
+ "eval_samples_per_second": 241.115,
209
+ "eval_steps_per_second": 5.094,
210
  "step": 35
211
  },
212
  {
213
  "epoch": 20.571428571428573,
214
  "eval_accuracy": 0.8661971830985915,
215
+ "eval_loss": 0.5374722480773926,
216
+ "eval_runtime": 0.6624,
217
+ "eval_samples_per_second": 214.362,
218
+ "eval_steps_per_second": 4.529,
219
  "step": 36
220
  },
221
  {
222
  "epoch": 21.714285714285715,
223
+ "eval_accuracy": 0.8661971830985915,
224
+ "eval_loss": 0.5355854034423828,
225
+ "eval_runtime": 0.6285,
226
+ "eval_samples_per_second": 225.953,
227
+ "eval_steps_per_second": 4.774,
228
  "step": 38
229
  },
230
  {
231
  "epoch": 22.857142857142858,
232
+ "grad_norm": 5.2482404708862305,
233
  "learning_rate": 2.380952380952381e-06,
234
+ "loss": 0.4343,
235
  "step": 40
236
  },
237
  {
238
  "epoch": 22.857142857142858,
239
+ "eval_accuracy": 0.8802816901408451,
240
+ "eval_loss": 0.5328315496444702,
241
+ "eval_runtime": 0.5751,
242
+ "eval_samples_per_second": 246.919,
243
+ "eval_steps_per_second": 5.217,
244
  "step": 40
245
  },
246
  {
247
  "epoch": 24.0,
248
+ "eval_accuracy": 0.8802816901408451,
249
+ "eval_loss": 0.531771719455719,
250
+ "eval_runtime": 0.5972,
251
+ "eval_samples_per_second": 237.757,
252
+ "eval_steps_per_second": 5.023,
253
  "step": 42
254
  },
255
  {
256
  "epoch": 24.571428571428573,
257
+ "eval_accuracy": 0.8802816901408451,
258
+ "eval_loss": 0.5329975485801697,
259
+ "eval_runtime": 0.6133,
260
+ "eval_samples_per_second": 231.529,
261
+ "eval_steps_per_second": 4.891,
262
  "step": 43
263
  },
264
  {
265
  "epoch": 25.714285714285715,
266
+ "eval_accuracy": 0.8802816901408451,
267
+ "eval_loss": 0.5333565473556519,
268
+ "eval_runtime": 0.5803,
269
+ "eval_samples_per_second": 244.697,
270
+ "eval_steps_per_second": 5.17,
271
  "step": 45
272
  },
273
  {
274
  "epoch": 26.857142857142858,
275
  "eval_accuracy": 0.8732394366197183,
276
+ "eval_loss": 0.5332355499267578,
277
+ "eval_runtime": 0.6475,
278
+ "eval_samples_per_second": 219.301,
279
+ "eval_steps_per_second": 4.633,
280
  "step": 47
281
  },
282
  {
283
  "epoch": 28.0,
284
  "eval_accuracy": 0.8732394366197183,
285
+ "eval_loss": 0.5341118574142456,
286
+ "eval_runtime": 0.5963,
287
+ "eval_samples_per_second": 238.119,
288
+ "eval_steps_per_second": 5.031,
289
  "step": 49
290
  },
291
  {
292
  "epoch": 28.571428571428573,
293
+ "grad_norm": 4.288495063781738,
294
  "learning_rate": 0.0,
295
+ "loss": 0.4271,
296
  "step": 50
297
  },
298
  {
299
  "epoch": 28.571428571428573,
300
  "eval_accuracy": 0.8732394366197183,
301
+ "eval_loss": 0.5343306064605713,
302
+ "eval_runtime": 0.6337,
303
+ "eval_samples_per_second": 224.087,
304
+ "eval_steps_per_second": 4.734,
305
  "step": 50
306
  },
307
  {
308
  "epoch": 28.571428571428573,
309
  "step": 50,
310
  "total_flos": 2.3842598606630093e+17,
311
+ "train_loss": 0.4329641246795654,
312
+ "train_runtime": 127.8111,
313
+ "train_samples_per_second": 166.652,
314
+ "train_steps_per_second": 0.391
315
  }
316
  ],
317
  "logging_steps": 10,