rocassis commited on
Commit
95b81a5
·
verified ·
1 Parent(s): 8958da0

All Dunn!!!

Browse files
README.md CHANGED
@@ -14,6 +14,9 @@ should probably proofread and complete it, then remove this comment. -->
14
  # trocr-large-printed-cmc7_tesseract_MICR_ocr
15
 
16
  This model is a fine-tuned version of [microsoft/trocr-large-printed](https://huggingface.co/microsoft/trocr-large-printed) on an unknown dataset.
 
 
 
17
 
18
  ## Model description
19
 
 
14
  # trocr-large-printed-cmc7_tesseract_MICR_ocr
15
 
16
  This model is a fine-tuned version of [microsoft/trocr-large-printed](https://huggingface.co/microsoft/trocr-large-printed) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.2143
19
+ - Cer: 0.0027
20
 
21
  ## Model description
22
 
generation_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "decoder_start_token_id": 2,
5
  "early_stopping": true,
 
1
  {
 
2
  "bos_token_id": 0,
3
  "decoder_start_token_id": 2,
4
  "early_stopping": true,
runs/Dec19_16-56-04_c5bc465dc86b/events.out.tfevents.1734636742.c5bc465dc86b.456.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab5d6756ee1e331651b0c84b0779dcc79cc5b3c384c9deea2fab0f902756b786
3
+ size 406
trainer_state.json ADDED
@@ -0,0 +1,749 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3120,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0016025641025641025,
13
+ "grad_norm": 213.62364196777344,
14
+ "learning_rate": 4.998397435897436e-05,
15
+ "loss": 9.3378,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.049679487179487176,
20
+ "grad_norm": 43.086360931396484,
21
+ "learning_rate": 4.950320512820513e-05,
22
+ "loss": 2.5252,
23
+ "step": 31
24
+ },
25
+ {
26
+ "epoch": 0.09935897435897435,
27
+ "grad_norm": 48.219451904296875,
28
+ "learning_rate": 4.9006410256410256e-05,
29
+ "loss": 1.5213,
30
+ "step": 62
31
+ },
32
+ {
33
+ "epoch": 0.14903846153846154,
34
+ "grad_norm": 30.23198127746582,
35
+ "learning_rate": 4.850961538461539e-05,
36
+ "loss": 1.9636,
37
+ "step": 93
38
+ },
39
+ {
40
+ "epoch": 0.1987179487179487,
41
+ "grad_norm": 52.96772766113281,
42
+ "learning_rate": 4.8012820512820516e-05,
43
+ "loss": 1.3545,
44
+ "step": 124
45
+ },
46
+ {
47
+ "epoch": 0.2483974358974359,
48
+ "grad_norm": 48.79069900512695,
49
+ "learning_rate": 4.751602564102564e-05,
50
+ "loss": 1.4998,
51
+ "step": 155
52
+ },
53
+ {
54
+ "epoch": 0.2980769230769231,
55
+ "grad_norm": 38.8032341003418,
56
+ "learning_rate": 4.701923076923077e-05,
57
+ "loss": 1.4654,
58
+ "step": 186
59
+ },
60
+ {
61
+ "epoch": 0.34775641025641024,
62
+ "grad_norm": 106.62429809570312,
63
+ "learning_rate": 4.65224358974359e-05,
64
+ "loss": 1.8027,
65
+ "step": 217
66
+ },
67
+ {
68
+ "epoch": 0.3974358974358974,
69
+ "grad_norm": 25.370744705200195,
70
+ "learning_rate": 4.602564102564102e-05,
71
+ "loss": 1.4394,
72
+ "step": 248
73
+ },
74
+ {
75
+ "epoch": 0.44711538461538464,
76
+ "grad_norm": 82.52824401855469,
77
+ "learning_rate": 4.5528846153846157e-05,
78
+ "loss": 1.4573,
79
+ "step": 279
80
+ },
81
+ {
82
+ "epoch": 0.4967948717948718,
83
+ "grad_norm": 19.148210525512695,
84
+ "learning_rate": 4.503205128205128e-05,
85
+ "loss": 1.2547,
86
+ "step": 310
87
+ },
88
+ {
89
+ "epoch": 0.5464743589743589,
90
+ "grad_norm": 46.984779357910156,
91
+ "learning_rate": 4.453525641025642e-05,
92
+ "loss": 1.3297,
93
+ "step": 341
94
+ },
95
+ {
96
+ "epoch": 0.5961538461538461,
97
+ "grad_norm": 45.50709915161133,
98
+ "learning_rate": 4.403846153846154e-05,
99
+ "loss": 1.3359,
100
+ "step": 372
101
+ },
102
+ {
103
+ "epoch": 0.6458333333333334,
104
+ "grad_norm": 36.45267105102539,
105
+ "learning_rate": 4.354166666666667e-05,
106
+ "loss": 1.2466,
107
+ "step": 403
108
+ },
109
+ {
110
+ "epoch": 0.6955128205128205,
111
+ "grad_norm": 20.8362979888916,
112
+ "learning_rate": 4.30448717948718e-05,
113
+ "loss": 1.1206,
114
+ "step": 434
115
+ },
116
+ {
117
+ "epoch": 0.7451923076923077,
118
+ "grad_norm": 64.35557556152344,
119
+ "learning_rate": 4.2548076923076924e-05,
120
+ "loss": 1.3676,
121
+ "step": 465
122
+ },
123
+ {
124
+ "epoch": 0.7948717948717948,
125
+ "grad_norm": 12.930615425109863,
126
+ "learning_rate": 4.205128205128206e-05,
127
+ "loss": 1.3871,
128
+ "step": 496
129
+ },
130
+ {
131
+ "epoch": 0.844551282051282,
132
+ "grad_norm": 51.6870002746582,
133
+ "learning_rate": 4.1554487179487184e-05,
134
+ "loss": 1.0038,
135
+ "step": 527
136
+ },
137
+ {
138
+ "epoch": 0.8942307692307693,
139
+ "grad_norm": 14.677131652832031,
140
+ "learning_rate": 4.105769230769231e-05,
141
+ "loss": 1.2283,
142
+ "step": 558
143
+ },
144
+ {
145
+ "epoch": 0.9439102564102564,
146
+ "grad_norm": 43.24152755737305,
147
+ "learning_rate": 4.056089743589744e-05,
148
+ "loss": 1.0872,
149
+ "step": 589
150
+ },
151
+ {
152
+ "epoch": 0.9935897435897436,
153
+ "grad_norm": 25.641794204711914,
154
+ "learning_rate": 4.006410256410257e-05,
155
+ "loss": 0.9991,
156
+ "step": 620
157
+ },
158
+ {
159
+ "epoch": 1.0432692307692308,
160
+ "grad_norm": 36.622703552246094,
161
+ "learning_rate": 3.956730769230769e-05,
162
+ "loss": 0.9941,
163
+ "step": 651
164
+ },
165
+ {
166
+ "epoch": 1.092948717948718,
167
+ "grad_norm": 17.249757766723633,
168
+ "learning_rate": 3.9070512820512824e-05,
169
+ "loss": 0.9877,
170
+ "step": 682
171
+ },
172
+ {
173
+ "epoch": 1.142628205128205,
174
+ "grad_norm": 31.591552734375,
175
+ "learning_rate": 3.857371794871795e-05,
176
+ "loss": 0.8917,
177
+ "step": 713
178
+ },
179
+ {
180
+ "epoch": 1.1923076923076923,
181
+ "grad_norm": 28.23312759399414,
182
+ "learning_rate": 3.807692307692308e-05,
183
+ "loss": 0.9264,
184
+ "step": 744
185
+ },
186
+ {
187
+ "epoch": 1.2419871794871795,
188
+ "grad_norm": 37.707183837890625,
189
+ "learning_rate": 3.7580128205128204e-05,
190
+ "loss": 0.849,
191
+ "step": 775
192
+ },
193
+ {
194
+ "epoch": 1.2916666666666667,
195
+ "grad_norm": 12.67361831665039,
196
+ "learning_rate": 3.708333333333334e-05,
197
+ "loss": 0.8255,
198
+ "step": 806
199
+ },
200
+ {
201
+ "epoch": 1.3413461538461537,
202
+ "grad_norm": 39.1461181640625,
203
+ "learning_rate": 3.658653846153846e-05,
204
+ "loss": 0.7372,
205
+ "step": 837
206
+ },
207
+ {
208
+ "epoch": 1.391025641025641,
209
+ "grad_norm": 33.27256393432617,
210
+ "learning_rate": 3.608974358974359e-05,
211
+ "loss": 0.8227,
212
+ "step": 868
213
+ },
214
+ {
215
+ "epoch": 1.4407051282051282,
216
+ "grad_norm": 545.9265747070312,
217
+ "learning_rate": 3.559294871794872e-05,
218
+ "loss": 0.7941,
219
+ "step": 899
220
+ },
221
+ {
222
+ "epoch": 1.4903846153846154,
223
+ "grad_norm": 23.55372428894043,
224
+ "learning_rate": 3.5096153846153845e-05,
225
+ "loss": 0.8686,
226
+ "step": 930
227
+ },
228
+ {
229
+ "epoch": 1.5400641025641026,
230
+ "grad_norm": 22.64493179321289,
231
+ "learning_rate": 3.459935897435898e-05,
232
+ "loss": 0.7059,
233
+ "step": 961
234
+ },
235
+ {
236
+ "epoch": 1.5897435897435899,
237
+ "grad_norm": 79.4326171875,
238
+ "learning_rate": 3.4102564102564105e-05,
239
+ "loss": 0.7567,
240
+ "step": 992
241
+ },
242
+ {
243
+ "epoch": 1.6394230769230769,
244
+ "grad_norm": 13.156254768371582,
245
+ "learning_rate": 3.360576923076923e-05,
246
+ "loss": 0.7138,
247
+ "step": 1023
248
+ },
249
+ {
250
+ "epoch": 1.689102564102564,
251
+ "grad_norm": 39.97475051879883,
252
+ "learning_rate": 3.310897435897436e-05,
253
+ "loss": 0.676,
254
+ "step": 1054
255
+ },
256
+ {
257
+ "epoch": 1.7387820512820513,
258
+ "grad_norm": 24.62538719177246,
259
+ "learning_rate": 3.261217948717949e-05,
260
+ "loss": 0.683,
261
+ "step": 1085
262
+ },
263
+ {
264
+ "epoch": 1.7884615384615383,
265
+ "grad_norm": 18.064680099487305,
266
+ "learning_rate": 3.211538461538462e-05,
267
+ "loss": 0.5746,
268
+ "step": 1116
269
+ },
270
+ {
271
+ "epoch": 1.8381410256410255,
272
+ "grad_norm": 10.56497573852539,
273
+ "learning_rate": 3.1618589743589746e-05,
274
+ "loss": 0.5881,
275
+ "step": 1147
276
+ },
277
+ {
278
+ "epoch": 1.8878205128205128,
279
+ "grad_norm": 43.007911682128906,
280
+ "learning_rate": 3.112179487179487e-05,
281
+ "loss": 0.78,
282
+ "step": 1178
283
+ },
284
+ {
285
+ "epoch": 1.9375,
286
+ "grad_norm": 25.748680114746094,
287
+ "learning_rate": 3.0625000000000006e-05,
288
+ "loss": 0.592,
289
+ "step": 1209
290
+ },
291
+ {
292
+ "epoch": 1.9871794871794872,
293
+ "grad_norm": 20.511089324951172,
294
+ "learning_rate": 3.012820512820513e-05,
295
+ "loss": 0.6807,
296
+ "step": 1240
297
+ },
298
+ {
299
+ "epoch": 2.0368589743589745,
300
+ "grad_norm": 26.275846481323242,
301
+ "learning_rate": 2.963141025641026e-05,
302
+ "loss": 0.6935,
303
+ "step": 1271
304
+ },
305
+ {
306
+ "epoch": 2.0865384615384617,
307
+ "grad_norm": 69.18566131591797,
308
+ "learning_rate": 2.913461538461539e-05,
309
+ "loss": 0.511,
310
+ "step": 1302
311
+ },
312
+ {
313
+ "epoch": 2.136217948717949,
314
+ "grad_norm": 4.620207786560059,
315
+ "learning_rate": 2.8637820512820513e-05,
316
+ "loss": 0.5338,
317
+ "step": 1333
318
+ },
319
+ {
320
+ "epoch": 2.185897435897436,
321
+ "grad_norm": 44.329566955566406,
322
+ "learning_rate": 2.8141025641025643e-05,
323
+ "loss": 0.4471,
324
+ "step": 1364
325
+ },
326
+ {
327
+ "epoch": 2.235576923076923,
328
+ "grad_norm": 24.95821762084961,
329
+ "learning_rate": 2.7644230769230773e-05,
330
+ "loss": 0.5295,
331
+ "step": 1395
332
+ },
333
+ {
334
+ "epoch": 2.28525641025641,
335
+ "grad_norm": 32.0108757019043,
336
+ "learning_rate": 2.7147435897435896e-05,
337
+ "loss": 0.5075,
338
+ "step": 1426
339
+ },
340
+ {
341
+ "epoch": 2.3349358974358974,
342
+ "grad_norm": 15.610992431640625,
343
+ "learning_rate": 2.6650641025641026e-05,
344
+ "loss": 0.4898,
345
+ "step": 1457
346
+ },
347
+ {
348
+ "epoch": 2.3846153846153846,
349
+ "grad_norm": 6.399534225463867,
350
+ "learning_rate": 2.6153846153846157e-05,
351
+ "loss": 0.4273,
352
+ "step": 1488
353
+ },
354
+ {
355
+ "epoch": 2.434294871794872,
356
+ "grad_norm": 22.35305404663086,
357
+ "learning_rate": 2.565705128205128e-05,
358
+ "loss": 0.4826,
359
+ "step": 1519
360
+ },
361
+ {
362
+ "epoch": 2.483974358974359,
363
+ "grad_norm": 26.82170867919922,
364
+ "learning_rate": 2.516025641025641e-05,
365
+ "loss": 0.4472,
366
+ "step": 1550
367
+ },
368
+ {
369
+ "epoch": 2.5336538461538463,
370
+ "grad_norm": 31.885419845581055,
371
+ "learning_rate": 2.466346153846154e-05,
372
+ "loss": 0.3986,
373
+ "step": 1581
374
+ },
375
+ {
376
+ "epoch": 2.5833333333333335,
377
+ "grad_norm": 21.020639419555664,
378
+ "learning_rate": 2.4166666666666667e-05,
379
+ "loss": 0.4856,
380
+ "step": 1612
381
+ },
382
+ {
383
+ "epoch": 2.6330128205128203,
384
+ "grad_norm": 10.991323471069336,
385
+ "learning_rate": 2.3669871794871794e-05,
386
+ "loss": 0.5008,
387
+ "step": 1643
388
+ },
389
+ {
390
+ "epoch": 2.6826923076923075,
391
+ "grad_norm": 17.077383041381836,
392
+ "learning_rate": 2.3173076923076924e-05,
393
+ "loss": 0.3585,
394
+ "step": 1674
395
+ },
396
+ {
397
+ "epoch": 2.7323717948717947,
398
+ "grad_norm": 6.274226188659668,
399
+ "learning_rate": 2.2676282051282054e-05,
400
+ "loss": 0.4101,
401
+ "step": 1705
402
+ },
403
+ {
404
+ "epoch": 2.782051282051282,
405
+ "grad_norm": 7.574007511138916,
406
+ "learning_rate": 2.217948717948718e-05,
407
+ "loss": 0.3604,
408
+ "step": 1736
409
+ },
410
+ {
411
+ "epoch": 2.831730769230769,
412
+ "grad_norm": 15.08122730255127,
413
+ "learning_rate": 2.168269230769231e-05,
414
+ "loss": 0.407,
415
+ "step": 1767
416
+ },
417
+ {
418
+ "epoch": 2.8814102564102564,
419
+ "grad_norm": 32.56257247924805,
420
+ "learning_rate": 2.1185897435897437e-05,
421
+ "loss": 0.4002,
422
+ "step": 1798
423
+ },
424
+ {
425
+ "epoch": 2.9310897435897436,
426
+ "grad_norm": 34.24089431762695,
427
+ "learning_rate": 2.0689102564102564e-05,
428
+ "loss": 0.429,
429
+ "step": 1829
430
+ },
431
+ {
432
+ "epoch": 2.980769230769231,
433
+ "grad_norm": 5.287622928619385,
434
+ "learning_rate": 2.0192307692307694e-05,
435
+ "loss": 0.352,
436
+ "step": 1860
437
+ },
438
+ {
439
+ "epoch": 3.030448717948718,
440
+ "grad_norm": 33.49539566040039,
441
+ "learning_rate": 1.969551282051282e-05,
442
+ "loss": 0.3348,
443
+ "step": 1891
444
+ },
445
+ {
446
+ "epoch": 3.0801282051282053,
447
+ "grad_norm": 3.116619825363159,
448
+ "learning_rate": 1.919871794871795e-05,
449
+ "loss": 0.2613,
450
+ "step": 1922
451
+ },
452
+ {
453
+ "epoch": 3.1298076923076925,
454
+ "grad_norm": 5.825009346008301,
455
+ "learning_rate": 1.8701923076923078e-05,
456
+ "loss": 0.279,
457
+ "step": 1953
458
+ },
459
+ {
460
+ "epoch": 3.1794871794871793,
461
+ "grad_norm": 2.2447965145111084,
462
+ "learning_rate": 1.8205128205128204e-05,
463
+ "loss": 0.3405,
464
+ "step": 1984
465
+ },
466
+ {
467
+ "epoch": 3.2291666666666665,
468
+ "grad_norm": 6.297438144683838,
469
+ "learning_rate": 1.7708333333333335e-05,
470
+ "loss": 0.3294,
471
+ "step": 2015
472
+ },
473
+ {
474
+ "epoch": 3.2788461538461537,
475
+ "grad_norm": 17.42154884338379,
476
+ "learning_rate": 1.721153846153846e-05,
477
+ "loss": 0.296,
478
+ "step": 2046
479
+ },
480
+ {
481
+ "epoch": 3.328525641025641,
482
+ "grad_norm": 11.834112167358398,
483
+ "learning_rate": 1.6714743589743588e-05,
484
+ "loss": 0.3767,
485
+ "step": 2077
486
+ },
487
+ {
488
+ "epoch": 3.378205128205128,
489
+ "grad_norm": 2.2308290004730225,
490
+ "learning_rate": 1.6217948717948718e-05,
491
+ "loss": 0.2352,
492
+ "step": 2108
493
+ },
494
+ {
495
+ "epoch": 3.4278846153846154,
496
+ "grad_norm": 13.97610855102539,
497
+ "learning_rate": 1.5721153846153845e-05,
498
+ "loss": 0.2937,
499
+ "step": 2139
500
+ },
501
+ {
502
+ "epoch": 3.4775641025641026,
503
+ "grad_norm": 4.005247116088867,
504
+ "learning_rate": 1.5224358974358973e-05,
505
+ "loss": 0.283,
506
+ "step": 2170
507
+ },
508
+ {
509
+ "epoch": 3.52724358974359,
510
+ "grad_norm": 5.180654525756836,
511
+ "learning_rate": 1.4727564102564103e-05,
512
+ "loss": 0.2773,
513
+ "step": 2201
514
+ },
515
+ {
516
+ "epoch": 3.5769230769230766,
517
+ "grad_norm": 6.487146854400635,
518
+ "learning_rate": 1.423076923076923e-05,
519
+ "loss": 0.2665,
520
+ "step": 2232
521
+ },
522
+ {
523
+ "epoch": 3.626602564102564,
524
+ "grad_norm": 2.202457904815674,
525
+ "learning_rate": 1.373397435897436e-05,
526
+ "loss": 0.2541,
527
+ "step": 2263
528
+ },
529
+ {
530
+ "epoch": 3.676282051282051,
531
+ "grad_norm": 47.195499420166016,
532
+ "learning_rate": 1.3237179487179489e-05,
533
+ "loss": 0.2362,
534
+ "step": 2294
535
+ },
536
+ {
537
+ "epoch": 3.7259615384615383,
538
+ "grad_norm": 21.26088523864746,
539
+ "learning_rate": 1.2740384615384615e-05,
540
+ "loss": 0.2331,
541
+ "step": 2325
542
+ },
543
+ {
544
+ "epoch": 3.7756410256410255,
545
+ "grad_norm": 16.77073860168457,
546
+ "learning_rate": 1.2243589743589744e-05,
547
+ "loss": 0.2134,
548
+ "step": 2356
549
+ },
550
+ {
551
+ "epoch": 3.8253205128205128,
552
+ "grad_norm": 2.246936559677124,
553
+ "learning_rate": 1.1746794871794872e-05,
554
+ "loss": 0.2437,
555
+ "step": 2387
556
+ },
557
+ {
558
+ "epoch": 3.875,
559
+ "grad_norm": 1.4183766841888428,
560
+ "learning_rate": 1.125e-05,
561
+ "loss": 0.2012,
562
+ "step": 2418
563
+ },
564
+ {
565
+ "epoch": 3.9246794871794872,
566
+ "grad_norm": 3.9523444175720215,
567
+ "learning_rate": 1.0753205128205129e-05,
568
+ "loss": 0.2204,
569
+ "step": 2449
570
+ },
571
+ {
572
+ "epoch": 3.9743589743589745,
573
+ "grad_norm": 3.564764976501465,
574
+ "learning_rate": 1.0256410256410256e-05,
575
+ "loss": 0.2034,
576
+ "step": 2480
577
+ },
578
+ {
579
+ "epoch": 4.024038461538462,
580
+ "grad_norm": 1.7849295139312744,
581
+ "learning_rate": 9.759615384615384e-06,
582
+ "loss": 0.1819,
583
+ "step": 2511
584
+ },
585
+ {
586
+ "epoch": 4.073717948717949,
587
+ "grad_norm": 1.7893842458724976,
588
+ "learning_rate": 9.262820512820514e-06,
589
+ "loss": 0.1585,
590
+ "step": 2542
591
+ },
592
+ {
593
+ "epoch": 4.123397435897436,
594
+ "grad_norm": 3.4071810245513916,
595
+ "learning_rate": 8.766025641025641e-06,
596
+ "loss": 0.1596,
597
+ "step": 2573
598
+ },
599
+ {
600
+ "epoch": 4.173076923076923,
601
+ "grad_norm": 28.766258239746094,
602
+ "learning_rate": 8.26923076923077e-06,
603
+ "loss": 0.1563,
604
+ "step": 2604
605
+ },
606
+ {
607
+ "epoch": 4.222756410256411,
608
+ "grad_norm": 3.066229820251465,
609
+ "learning_rate": 7.772435897435898e-06,
610
+ "loss": 0.1612,
611
+ "step": 2635
612
+ },
613
+ {
614
+ "epoch": 4.272435897435898,
615
+ "grad_norm": 1.4539713859558105,
616
+ "learning_rate": 7.275641025641026e-06,
617
+ "loss": 0.151,
618
+ "step": 2666
619
+ },
620
+ {
621
+ "epoch": 4.322115384615385,
622
+ "grad_norm": 3.9048240184783936,
623
+ "learning_rate": 6.778846153846154e-06,
624
+ "loss": 0.1514,
625
+ "step": 2697
626
+ },
627
+ {
628
+ "epoch": 4.371794871794872,
629
+ "grad_norm": 1.2496715784072876,
630
+ "learning_rate": 6.282051282051282e-06,
631
+ "loss": 0.1632,
632
+ "step": 2728
633
+ },
634
+ {
635
+ "epoch": 4.421474358974359,
636
+ "grad_norm": 1.516411542892456,
637
+ "learning_rate": 5.785256410256411e-06,
638
+ "loss": 0.1497,
639
+ "step": 2759
640
+ },
641
+ {
642
+ "epoch": 4.471153846153846,
643
+ "grad_norm": 2.217137575149536,
644
+ "learning_rate": 5.288461538461538e-06,
645
+ "loss": 0.1514,
646
+ "step": 2790
647
+ },
648
+ {
649
+ "epoch": 4.520833333333333,
650
+ "grad_norm": 1.7278540134429932,
651
+ "learning_rate": 4.791666666666667e-06,
652
+ "loss": 0.1409,
653
+ "step": 2821
654
+ },
655
+ {
656
+ "epoch": 4.57051282051282,
657
+ "grad_norm": 2.3950905799865723,
658
+ "learning_rate": 4.294871794871795e-06,
659
+ "loss": 0.1524,
660
+ "step": 2852
661
+ },
662
+ {
663
+ "epoch": 4.6201923076923075,
664
+ "grad_norm": 2.2806148529052734,
665
+ "learning_rate": 3.798076923076923e-06,
666
+ "loss": 0.1498,
667
+ "step": 2883
668
+ },
669
+ {
670
+ "epoch": 4.669871794871795,
671
+ "grad_norm": 2.3029630184173584,
672
+ "learning_rate": 3.3012820512820517e-06,
673
+ "loss": 0.1471,
674
+ "step": 2914
675
+ },
676
+ {
677
+ "epoch": 4.719551282051282,
678
+ "grad_norm": 1.592606782913208,
679
+ "learning_rate": 2.8044871794871797e-06,
680
+ "loss": 0.14,
681
+ "step": 2945
682
+ },
683
+ {
684
+ "epoch": 4.769230769230769,
685
+ "grad_norm": 3.534292221069336,
686
+ "learning_rate": 2.307692307692308e-06,
687
+ "loss": 0.1426,
688
+ "step": 2976
689
+ },
690
+ {
691
+ "epoch": 4.818910256410256,
692
+ "grad_norm": 2.344146966934204,
693
+ "learning_rate": 1.810897435897436e-06,
694
+ "loss": 0.1399,
695
+ "step": 3007
696
+ },
697
+ {
698
+ "epoch": 4.868589743589744,
699
+ "grad_norm": 11.425804138183594,
700
+ "learning_rate": 1.3141025641025643e-06,
701
+ "loss": 0.1374,
702
+ "step": 3038
703
+ },
704
+ {
705
+ "epoch": 4.918269230769231,
706
+ "grad_norm": 2.704406499862671,
707
+ "learning_rate": 8.173076923076924e-07,
708
+ "loss": 0.1389,
709
+ "step": 3069
710
+ },
711
+ {
712
+ "epoch": 4.967948717948718,
713
+ "grad_norm": 6.8166728019714355,
714
+ "learning_rate": 3.205128205128205e-07,
715
+ "loss": 0.1721,
716
+ "step": 3100
717
+ },
718
+ {
719
+ "epoch": 5.0,
720
+ "step": 3120,
721
+ "total_flos": 1.845867535870722e+19,
722
+ "train_loss": 0.6135548925552613,
723
+ "train_runtime": 6226.8155,
724
+ "train_samples_per_second": 2.003,
725
+ "train_steps_per_second": 0.501
726
+ }
727
+ ],
728
+ "logging_steps": 31,
729
+ "max_steps": 3120,
730
+ "num_input_tokens_seen": 0,
731
+ "num_train_epochs": 5,
732
+ "save_steps": 500,
733
+ "stateful_callbacks": {
734
+ "TrainerControl": {
735
+ "args": {
736
+ "should_epoch_stop": false,
737
+ "should_evaluate": false,
738
+ "should_log": false,
739
+ "should_save": true,
740
+ "should_training_stop": true
741
+ },
742
+ "attributes": {}
743
+ }
744
+ },
745
+ "total_flos": 1.845867535870722e+19,
746
+ "train_batch_size": 4,
747
+ "trial_name": null,
748
+ "trial_params": null
749
+ }