Melo1512 commited on
Commit
26e1f78
·
verified ·
1 Parent(s): 6c87a12

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,8 +1,13 @@
1
  {
2
  "epoch": 28.571428571428573,
 
 
 
 
 
3
  "total_flos": 2.3842598606630093e+17,
4
- "train_loss": 0.4332954502105713,
5
- "train_runtime": 131.7536,
6
- "train_samples_per_second": 161.665,
7
- "train_steps_per_second": 0.379
8
  }
 
1
  {
2
  "epoch": 28.571428571428573,
3
+ "eval_accuracy": 0.8873239436619719,
4
+ "eval_loss": 0.5122641324996948,
5
+ "eval_runtime": 0.6466,
6
+ "eval_samples_per_second": 219.608,
7
+ "eval_steps_per_second": 4.64,
8
  "total_flos": 2.3842598606630093e+17,
9
+ "train_loss": 0.43769028663635257,
10
+ "train_runtime": 134.3919,
11
+ "train_samples_per_second": 158.492,
12
+ "train_steps_per_second": 0.372
13
  }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 28.571428571428573,
3
+ "eval_accuracy": 0.8873239436619719,
4
+ "eval_loss": 0.5122641324996948,
5
+ "eval_runtime": 0.6466,
6
+ "eval_samples_per_second": 219.608,
7
+ "eval_steps_per_second": 4.64
8
+ }
runs/Jan27_17-12-41_db1093ce036b/events.out.tfevents.1737998264.db1093ce036b.224.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c986a248b1092422da5b4d1e3d672d571bf03464b7b4d2a807d8924b4097dac
3
+ size 5786
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 28.571428571428573,
3
  "total_flos": 2.3842598606630093e+17,
4
- "train_loss": 0.4332954502105713,
5
- "train_runtime": 131.7536,
6
- "train_samples_per_second": 161.665,
7
- "train_steps_per_second": 0.379
8
  }
 
1
  {
2
  "epoch": 28.571428571428573,
3
  "total_flos": 2.3842598606630093e+17,
4
+ "train_loss": 0.43769028663635257,
5
+ "train_runtime": 134.3919,
6
+ "train_samples_per_second": 158.492,
7
+ "train_steps_per_second": 0.372
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.5166566371917725,
3
- "best_model_checkpoint": "vit-msn-small-beta-fia-manually-enhanced-HSV_test_3/checkpoint-24",
4
  "epoch": 28.571428571428573,
5
  "eval_steps": 500,
6
  "global_step": 50,
@@ -10,308 +10,308 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.5714285714285714,
13
- "eval_accuracy": 0.8873239436619719,
14
- "eval_loss": 0.5373859405517578,
15
- "eval_runtime": 0.608,
16
- "eval_samples_per_second": 233.567,
17
- "eval_steps_per_second": 4.935,
18
  "step": 1
19
  },
20
  {
21
  "epoch": 1.7142857142857144,
22
- "eval_accuracy": 0.8873239436619719,
23
- "eval_loss": 0.5290086269378662,
24
- "eval_runtime": 0.6285,
25
- "eval_samples_per_second": 225.924,
26
- "eval_steps_per_second": 4.773,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 2.857142857142857,
31
- "eval_accuracy": 0.8873239436619719,
32
- "eval_loss": 0.5365429520606995,
33
- "eval_runtime": 0.6154,
34
- "eval_samples_per_second": 230.751,
35
- "eval_steps_per_second": 4.875,
36
  "step": 5
37
  },
38
  {
39
  "epoch": 4.0,
40
  "eval_accuracy": 0.8802816901408451,
41
- "eval_loss": 0.5444374084472656,
42
- "eval_runtime": 0.6664,
43
- "eval_samples_per_second": 213.073,
44
- "eval_steps_per_second": 4.502,
45
  "step": 7
46
  },
47
  {
48
  "epoch": 4.571428571428571,
49
  "eval_accuracy": 0.8802816901408451,
50
- "eval_loss": 0.5448340773582458,
51
- "eval_runtime": 0.7088,
52
- "eval_samples_per_second": 200.338,
53
- "eval_steps_per_second": 4.232,
54
  "step": 8
55
  },
56
  {
57
  "epoch": 5.714285714285714,
58
- "grad_norm": 3.9558749198913574,
59
  "learning_rate": 5.882352941176471e-06,
60
- "loss": 0.4449,
61
  "step": 10
62
  },
63
  {
64
  "epoch": 5.714285714285714,
65
- "eval_accuracy": 0.8591549295774648,
66
- "eval_loss": 0.5697565078735352,
67
- "eval_runtime": 0.6385,
68
- "eval_samples_per_second": 222.381,
69
- "eval_steps_per_second": 4.698,
70
  "step": 10
71
  },
72
  {
73
  "epoch": 6.857142857142857,
74
- "eval_accuracy": 0.8661971830985915,
75
- "eval_loss": 0.5764577984809875,
76
- "eval_runtime": 0.6144,
77
- "eval_samples_per_second": 231.129,
78
- "eval_steps_per_second": 4.883,
79
  "step": 12
80
  },
81
  {
82
  "epoch": 8.0,
83
- "eval_accuracy": 0.8661971830985915,
84
- "eval_loss": 0.5411155223846436,
85
- "eval_runtime": 0.62,
86
- "eval_samples_per_second": 229.044,
87
- "eval_steps_per_second": 4.839,
88
  "step": 14
89
  },
90
  {
91
  "epoch": 8.571428571428571,
92
- "eval_accuracy": 0.8802816901408451,
93
- "eval_loss": 0.5282784104347229,
94
- "eval_runtime": 0.6438,
95
- "eval_samples_per_second": 220.561,
96
- "eval_steps_per_second": 4.66,
97
  "step": 15
98
  },
99
  {
100
  "epoch": 9.714285714285714,
101
  "eval_accuracy": 0.8802816901408451,
102
- "eval_loss": 0.5272403955459595,
103
- "eval_runtime": 0.7039,
104
- "eval_samples_per_second": 201.741,
105
- "eval_steps_per_second": 4.262,
106
  "step": 17
107
  },
108
  {
109
  "epoch": 10.857142857142858,
110
  "eval_accuracy": 0.8732394366197183,
111
- "eval_loss": 0.5336695313453674,
112
- "eval_runtime": 0.6047,
113
- "eval_samples_per_second": 234.826,
114
- "eval_steps_per_second": 4.961,
115
  "step": 19
116
  },
117
  {
118
  "epoch": 11.428571428571429,
119
- "grad_norm": 5.938806533813477,
120
  "learning_rate": 9.090909090909091e-06,
121
- "loss": 0.4343,
122
  "step": 20
123
  },
124
  {
125
  "epoch": 12.0,
126
  "eval_accuracy": 0.8732394366197183,
127
- "eval_loss": 0.5399956703186035,
128
- "eval_runtime": 0.7179,
129
- "eval_samples_per_second": 197.787,
130
- "eval_steps_per_second": 4.179,
131
  "step": 21
132
  },
133
  {
134
  "epoch": 12.571428571428571,
135
- "eval_accuracy": 0.8802816901408451,
136
- "eval_loss": 0.5316654443740845,
137
- "eval_runtime": 0.6548,
138
- "eval_samples_per_second": 216.861,
139
- "eval_steps_per_second": 4.582,
140
  "step": 22
141
  },
142
  {
143
  "epoch": 13.714285714285714,
144
  "eval_accuracy": 0.8802816901408451,
145
- "eval_loss": 0.5166566371917725,
146
- "eval_runtime": 0.6957,
147
- "eval_samples_per_second": 204.108,
148
- "eval_steps_per_second": 4.312,
149
  "step": 24
150
  },
151
  {
152
  "epoch": 14.857142857142858,
153
  "eval_accuracy": 0.8732394366197183,
154
- "eval_loss": 0.5271970629692078,
155
- "eval_runtime": 0.6146,
156
- "eval_samples_per_second": 231.056,
157
- "eval_steps_per_second": 4.881,
158
  "step": 26
159
  },
160
  {
161
  "epoch": 16.0,
162
- "eval_accuracy": 0.8802816901408451,
163
- "eval_loss": 0.5361518859863281,
164
- "eval_runtime": 0.679,
165
- "eval_samples_per_second": 209.117,
166
- "eval_steps_per_second": 4.418,
167
  "step": 28
168
  },
169
  {
170
  "epoch": 16.571428571428573,
171
- "eval_accuracy": 0.8732394366197183,
172
- "eval_loss": 0.5353840589523315,
173
- "eval_runtime": 0.6496,
174
- "eval_samples_per_second": 218.579,
175
- "eval_steps_per_second": 4.618,
176
  "step": 29
177
  },
178
  {
179
  "epoch": 17.142857142857142,
180
- "grad_norm": 4.4233927726745605,
181
  "learning_rate": 6.060606060606061e-06,
182
- "loss": 0.4389,
183
  "step": 30
184
  },
185
  {
186
  "epoch": 17.714285714285715,
187
- "eval_accuracy": 0.8802816901408451,
188
- "eval_loss": 0.5263069868087769,
189
- "eval_runtime": 0.6615,
190
- "eval_samples_per_second": 214.668,
191
- "eval_steps_per_second": 4.535,
192
  "step": 31
193
  },
194
  {
195
  "epoch": 18.857142857142858,
196
- "eval_accuracy": 0.8732394366197183,
197
- "eval_loss": 0.5342370271682739,
198
- "eval_runtime": 0.6468,
199
- "eval_samples_per_second": 219.55,
200
- "eval_steps_per_second": 4.638,
201
  "step": 33
202
  },
203
  {
204
  "epoch": 20.0,
205
  "eval_accuracy": 0.8802816901408451,
206
- "eval_loss": 0.5446226596832275,
207
- "eval_runtime": 0.666,
208
- "eval_samples_per_second": 213.222,
209
- "eval_steps_per_second": 4.505,
210
  "step": 35
211
  },
212
  {
213
  "epoch": 20.571428571428573,
214
  "eval_accuracy": 0.8802816901408451,
215
- "eval_loss": 0.5494755506515503,
216
- "eval_runtime": 0.6103,
217
- "eval_samples_per_second": 232.659,
218
- "eval_steps_per_second": 4.915,
219
  "step": 36
220
  },
221
  {
222
  "epoch": 21.714285714285715,
223
  "eval_accuracy": 0.8802816901408451,
224
- "eval_loss": 0.5442414879798889,
225
- "eval_runtime": 0.6404,
226
- "eval_samples_per_second": 221.745,
227
- "eval_steps_per_second": 4.685,
228
  "step": 38
229
  },
230
  {
231
  "epoch": 22.857142857142858,
232
- "grad_norm": 3.910562038421631,
233
  "learning_rate": 3.0303030303030305e-06,
234
- "loss": 0.423,
235
  "step": 40
236
  },
237
  {
238
  "epoch": 22.857142857142858,
239
  "eval_accuracy": 0.8802816901408451,
240
- "eval_loss": 0.5322338938713074,
241
- "eval_runtime": 0.6332,
242
- "eval_samples_per_second": 224.25,
243
- "eval_steps_per_second": 4.738,
244
  "step": 40
245
  },
246
  {
247
  "epoch": 24.0,
248
  "eval_accuracy": 0.8802816901408451,
249
- "eval_loss": 0.5273756384849548,
250
- "eval_runtime": 0.6297,
251
- "eval_samples_per_second": 225.51,
252
- "eval_steps_per_second": 4.764,
253
  "step": 42
254
  },
255
  {
256
  "epoch": 24.571428571428573,
257
- "eval_accuracy": 0.8802816901408451,
258
- "eval_loss": 0.5295581817626953,
259
- "eval_runtime": 0.6259,
260
- "eval_samples_per_second": 226.871,
261
- "eval_steps_per_second": 4.793,
262
  "step": 43
263
  },
264
  {
265
  "epoch": 25.714285714285715,
266
- "eval_accuracy": 0.8802816901408451,
267
- "eval_loss": 0.5362545847892761,
268
- "eval_runtime": 0.7205,
269
- "eval_samples_per_second": 197.076,
270
- "eval_steps_per_second": 4.164,
271
  "step": 45
272
  },
273
  {
274
  "epoch": 26.857142857142858,
275
  "eval_accuracy": 0.8732394366197183,
276
- "eval_loss": 0.5421485900878906,
277
- "eval_runtime": 0.6367,
278
- "eval_samples_per_second": 223.028,
279
- "eval_steps_per_second": 4.712,
280
  "step": 47
281
  },
282
  {
283
  "epoch": 28.0,
284
  "eval_accuracy": 0.8732394366197183,
285
- "eval_loss": 0.5466616153717041,
286
- "eval_runtime": 0.6369,
287
- "eval_samples_per_second": 222.952,
288
- "eval_steps_per_second": 4.71,
289
  "step": 49
290
  },
291
  {
292
  "epoch": 28.571428571428573,
293
- "grad_norm": 4.531747817993164,
294
  "learning_rate": 0.0,
295
- "loss": 0.4254,
296
  "step": 50
297
  },
298
  {
299
  "epoch": 28.571428571428573,
300
- "eval_accuracy": 0.8732394366197183,
301
- "eval_loss": 0.5473288893699646,
302
- "eval_runtime": 0.7049,
303
- "eval_samples_per_second": 201.437,
304
- "eval_steps_per_second": 4.256,
305
  "step": 50
306
  },
307
  {
308
  "epoch": 28.571428571428573,
309
  "step": 50,
310
  "total_flos": 2.3842598606630093e+17,
311
- "train_loss": 0.4332954502105713,
312
- "train_runtime": 131.7536,
313
- "train_samples_per_second": 161.665,
314
- "train_steps_per_second": 0.379
315
  }
316
  ],
317
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.5122641324996948,
3
+ "best_model_checkpoint": "vit-msn-small-beta-fia-manually-enhanced-HSV_test_3/checkpoint-15",
4
  "epoch": 28.571428571428573,
5
  "eval_steps": 500,
6
  "global_step": 50,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.5714285714285714,
13
+ "eval_accuracy": 0.8802816901408451,
14
+ "eval_loss": 0.5166566371917725,
15
+ "eval_runtime": 0.6288,
16
+ "eval_samples_per_second": 225.827,
17
+ "eval_steps_per_second": 4.771,
18
  "step": 1
19
  },
20
  {
21
  "epoch": 1.7142857142857144,
22
+ "eval_accuracy": 0.8802816901408451,
23
+ "eval_loss": 0.519730806350708,
24
+ "eval_runtime": 0.6203,
25
+ "eval_samples_per_second": 228.939,
26
+ "eval_steps_per_second": 4.837,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 2.857142857142857,
31
+ "eval_accuracy": 0.8802816901408451,
32
+ "eval_loss": 0.5266488194465637,
33
+ "eval_runtime": 0.6213,
34
+ "eval_samples_per_second": 228.546,
35
+ "eval_steps_per_second": 4.828,
36
  "step": 5
37
  },
38
  {
39
  "epoch": 4.0,
40
  "eval_accuracy": 0.8802816901408451,
41
+ "eval_loss": 0.5390564203262329,
42
+ "eval_runtime": 0.6545,
43
+ "eval_samples_per_second": 216.969,
44
+ "eval_steps_per_second": 4.584,
45
  "step": 7
46
  },
47
  {
48
  "epoch": 4.571428571428571,
49
  "eval_accuracy": 0.8802816901408451,
50
+ "eval_loss": 0.5425485968589783,
51
+ "eval_runtime": 0.6176,
52
+ "eval_samples_per_second": 229.935,
53
+ "eval_steps_per_second": 4.858,
54
  "step": 8
55
  },
56
  {
57
  "epoch": 5.714285714285714,
58
+ "grad_norm": 5.448883056640625,
59
  "learning_rate": 5.882352941176471e-06,
60
+ "loss": 0.4435,
61
  "step": 10
62
  },
63
  {
64
  "epoch": 5.714285714285714,
65
+ "eval_accuracy": 0.8802816901408451,
66
+ "eval_loss": 0.5403485298156738,
67
+ "eval_runtime": 0.6202,
68
+ "eval_samples_per_second": 228.973,
69
+ "eval_steps_per_second": 4.837,
70
  "step": 10
71
  },
72
  {
73
  "epoch": 6.857142857142857,
74
+ "eval_accuracy": 0.8802816901408451,
75
+ "eval_loss": 0.5251158475875854,
76
+ "eval_runtime": 0.6735,
77
+ "eval_samples_per_second": 210.854,
78
+ "eval_steps_per_second": 4.455,
79
  "step": 12
80
  },
81
  {
82
  "epoch": 8.0,
83
+ "eval_accuracy": 0.8732394366197183,
84
+ "eval_loss": 0.5160439610481262,
85
+ "eval_runtime": 0.6272,
86
+ "eval_samples_per_second": 226.418,
87
+ "eval_steps_per_second": 4.783,
88
  "step": 14
89
  },
90
  {
91
  "epoch": 8.571428571428571,
92
+ "eval_accuracy": 0.8873239436619719,
93
+ "eval_loss": 0.5122641324996948,
94
+ "eval_runtime": 0.6632,
95
+ "eval_samples_per_second": 214.109,
96
+ "eval_steps_per_second": 4.523,
97
  "step": 15
98
  },
99
  {
100
  "epoch": 9.714285714285714,
101
  "eval_accuracy": 0.8802816901408451,
102
+ "eval_loss": 0.5291638374328613,
103
+ "eval_runtime": 0.7041,
104
+ "eval_samples_per_second": 201.678,
105
+ "eval_steps_per_second": 4.261,
106
  "step": 17
107
  },
108
  {
109
  "epoch": 10.857142857142858,
110
  "eval_accuracy": 0.8732394366197183,
111
+ "eval_loss": 0.568649172782898,
112
+ "eval_runtime": 0.6802,
113
+ "eval_samples_per_second": 208.773,
114
+ "eval_steps_per_second": 4.411,
115
  "step": 19
116
  },
117
  {
118
  "epoch": 11.428571428571429,
119
+ "grad_norm": 5.323070049285889,
120
  "learning_rate": 9.090909090909091e-06,
121
+ "loss": 0.4418,
122
  "step": 20
123
  },
124
  {
125
  "epoch": 12.0,
126
  "eval_accuracy": 0.8732394366197183,
127
+ "eval_loss": 0.5459948182106018,
128
+ "eval_runtime": 0.6906,
129
+ "eval_samples_per_second": 205.619,
130
+ "eval_steps_per_second": 4.344,
131
  "step": 21
132
  },
133
  {
134
  "epoch": 12.571428571428571,
135
+ "eval_accuracy": 0.8873239436619719,
136
+ "eval_loss": 0.5333032608032227,
137
+ "eval_runtime": 0.6327,
138
+ "eval_samples_per_second": 224.427,
139
+ "eval_steps_per_second": 4.741,
140
  "step": 22
141
  },
142
  {
143
  "epoch": 13.714285714285714,
144
  "eval_accuracy": 0.8802816901408451,
145
+ "eval_loss": 0.5152425169944763,
146
+ "eval_runtime": 0.6626,
147
+ "eval_samples_per_second": 214.318,
148
+ "eval_steps_per_second": 4.528,
149
  "step": 24
150
  },
151
  {
152
  "epoch": 14.857142857142858,
153
  "eval_accuracy": 0.8732394366197183,
154
+ "eval_loss": 0.5236279964447021,
155
+ "eval_runtime": 0.6606,
156
+ "eval_samples_per_second": 214.94,
157
+ "eval_steps_per_second": 4.541,
158
  "step": 26
159
  },
160
  {
161
  "epoch": 16.0,
162
+ "eval_accuracy": 0.8591549295774648,
163
+ "eval_loss": 0.5372341275215149,
164
+ "eval_runtime": 0.6123,
165
+ "eval_samples_per_second": 231.928,
166
+ "eval_steps_per_second": 4.9,
167
  "step": 28
168
  },
169
  {
170
  "epoch": 16.571428571428573,
171
+ "eval_accuracy": 0.8591549295774648,
172
+ "eval_loss": 0.5472158193588257,
173
+ "eval_runtime": 0.6625,
174
+ "eval_samples_per_second": 214.333,
175
+ "eval_steps_per_second": 4.528,
176
  "step": 29
177
  },
178
  {
179
  "epoch": 17.142857142857142,
180
+ "grad_norm": 5.04396390914917,
181
  "learning_rate": 6.060606060606061e-06,
182
+ "loss": 0.4363,
183
  "step": 30
184
  },
185
  {
186
  "epoch": 17.714285714285715,
187
+ "eval_accuracy": 0.8591549295774648,
188
+ "eval_loss": 0.5421658754348755,
189
+ "eval_runtime": 0.6149,
190
+ "eval_samples_per_second": 230.945,
191
+ "eval_steps_per_second": 4.879,
192
  "step": 31
193
  },
194
  {
195
  "epoch": 18.857142857142858,
196
+ "eval_accuracy": 0.8802816901408451,
197
+ "eval_loss": 0.5293453335762024,
198
+ "eval_runtime": 0.6909,
199
+ "eval_samples_per_second": 205.543,
200
+ "eval_steps_per_second": 4.342,
201
  "step": 33
202
  },
203
  {
204
  "epoch": 20.0,
205
  "eval_accuracy": 0.8802816901408451,
206
+ "eval_loss": 0.523467481136322,
207
+ "eval_runtime": 0.6321,
208
+ "eval_samples_per_second": 224.663,
209
+ "eval_steps_per_second": 4.746,
210
  "step": 35
211
  },
212
  {
213
  "epoch": 20.571428571428573,
214
  "eval_accuracy": 0.8802816901408451,
215
+ "eval_loss": 0.523999810218811,
216
+ "eval_runtime": 0.6902,
217
+ "eval_samples_per_second": 205.737,
218
+ "eval_steps_per_second": 4.347,
219
  "step": 36
220
  },
221
  {
222
  "epoch": 21.714285714285715,
223
  "eval_accuracy": 0.8802816901408451,
224
+ "eval_loss": 0.5302459001541138,
225
+ "eval_runtime": 0.6908,
226
+ "eval_samples_per_second": 205.559,
227
+ "eval_steps_per_second": 4.343,
228
  "step": 38
229
  },
230
  {
231
  "epoch": 22.857142857142858,
232
+ "grad_norm": 5.993457794189453,
233
  "learning_rate": 3.0303030303030305e-06,
234
+ "loss": 0.4371,
235
  "step": 40
236
  },
237
  {
238
  "epoch": 22.857142857142858,
239
  "eval_accuracy": 0.8802816901408451,
240
+ "eval_loss": 0.5324126482009888,
241
+ "eval_runtime": 0.7047,
242
+ "eval_samples_per_second": 201.5,
243
+ "eval_steps_per_second": 4.257,
244
  "step": 40
245
  },
246
  {
247
  "epoch": 24.0,
248
  "eval_accuracy": 0.8802816901408451,
249
+ "eval_loss": 0.5349227786064148,
250
+ "eval_runtime": 0.6311,
251
+ "eval_samples_per_second": 225.0,
252
+ "eval_steps_per_second": 4.754,
253
  "step": 42
254
  },
255
  {
256
  "epoch": 24.571428571428573,
257
+ "eval_accuracy": 0.8732394366197183,
258
+ "eval_loss": 0.5362741351127625,
259
+ "eval_runtime": 0.6225,
260
+ "eval_samples_per_second": 228.126,
261
+ "eval_steps_per_second": 4.82,
262
  "step": 43
263
  },
264
  {
265
  "epoch": 25.714285714285715,
266
+ "eval_accuracy": 0.8732394366197183,
267
+ "eval_loss": 0.5341742634773254,
268
+ "eval_runtime": 0.6561,
269
+ "eval_samples_per_second": 216.426,
270
+ "eval_steps_per_second": 4.572,
271
  "step": 45
272
  },
273
  {
274
  "epoch": 26.857142857142858,
275
  "eval_accuracy": 0.8732394366197183,
276
+ "eval_loss": 0.5314902067184448,
277
+ "eval_runtime": 0.6626,
278
+ "eval_samples_per_second": 214.302,
279
+ "eval_steps_per_second": 4.528,
280
  "step": 47
281
  },
282
  {
283
  "epoch": 28.0,
284
  "eval_accuracy": 0.8732394366197183,
285
+ "eval_loss": 0.5318764448165894,
286
+ "eval_runtime": 0.6393,
287
+ "eval_samples_per_second": 222.13,
288
+ "eval_steps_per_second": 4.693,
289
  "step": 49
290
  },
291
  {
292
  "epoch": 28.571428571428573,
293
+ "grad_norm": 4.919415473937988,
294
  "learning_rate": 0.0,
295
+ "loss": 0.4298,
296
  "step": 50
297
  },
298
  {
299
  "epoch": 28.571428571428573,
300
+ "eval_accuracy": 0.8661971830985915,
301
+ "eval_loss": 0.5321409106254578,
302
+ "eval_runtime": 0.6159,
303
+ "eval_samples_per_second": 230.575,
304
+ "eval_steps_per_second": 4.871,
305
  "step": 50
306
  },
307
  {
308
  "epoch": 28.571428571428573,
309
  "step": 50,
310
  "total_flos": 2.3842598606630093e+17,
311
+ "train_loss": 0.43769028663635257,
312
+ "train_runtime": 134.3919,
313
+ "train_samples_per_second": 158.492,
314
+ "train_steps_per_second": 0.372
315
  }
316
  ],
317
  "logging_steps": 10,