Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d74c6baef789560733fa946e38e74b2be06c7078b3ed14eee73ce7d022c1f477
 size 138995824

 version https://git-lfs.github.com/spec/v1
+oid sha256:05cf8c3cc02150b9ab839cae3cff4b0ac64e7418f3e3a7dd8342967d4ff228b4
 size 138995824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ecba67370d7c63ceae60f4c992bd6ccfa6bc9a5a3bb399885e20e13d29eeda4
 size 71078228

 version https://git-lfs.github.com/spec/v1
+oid sha256:a68f0691f92e4878c405d72e98f58a608b5a0d8eca69ca0d44a1a7af1fa2eb61
 size 71078228

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c816665e8f875bf4084e64860734b6df57cbda2abaa223f5013be7e39890f6a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:01e258152773e0c47fade2e9491672a9b92ef4bd4076bdedcd0ad5798e0cf3fc
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505f9225762b105f8ca5168f44d99b2f8467174f4ade85f1cc95f684fbd828e0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe1d153de177b356f9e3a70d6e4ec979560b0c300994e71ca4cb89afc74c5b3a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.1766536235809326,
   "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.36488027366020526,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2847,6 +2847,714 @@
       "eval_samples_per_second": 16.427,
       "eval_steps_per_second": 4.111,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2861,7 +3569,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -2870,12 +3578,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.35716487528448e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.1766536235809326,
   "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 0.45610034207525657,
   "eval_steps": 100,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 16.427,
       "eval_steps_per_second": 4.111,
       "step": 400
+    },
+    {
+      "epoch": 0.36579247434435574,
+      "grad_norm": 0.12196308374404907,
+      "learning_rate": 1.013396731136465e-05,
+      "loss": 1.0011,
+      "step": 401
+    },
+    {
+      "epoch": 0.3667046750285063,
+      "grad_norm": 0.13182106614112854,
+      "learning_rate": 9.937309365446973e-06,
+      "loss": 1.1752,
+      "step": 402
+    },
+    {
+      "epoch": 0.3676168757126568,
+      "grad_norm": 0.14486649632453918,
+      "learning_rate": 9.742367571857091e-06,
+      "loss": 1.1526,
+      "step": 403
+    },
+    {
+      "epoch": 0.3685290763968073,
+      "grad_norm": 0.14698417484760284,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.2639,
+      "step": 404
+    },
+    {
+      "epoch": 0.3694412770809578,
+      "grad_norm": 0.16532011330127716,
+      "learning_rate": 9.357665770419244e-06,
+      "loss": 1.3237,
+      "step": 405
+    },
+    {
+      "epoch": 0.3703534777651083,
+      "grad_norm": 0.17021730542182922,
+      "learning_rate": 9.167922241916055e-06,
+      "loss": 1.3305,
+      "step": 406
+    },
+    {
+      "epoch": 0.37126567844925884,
+      "grad_norm": 0.1707400232553482,
+      "learning_rate": 8.97992782372432e-06,
+      "loss": 1.1334,
+      "step": 407
+    },
+    {
+      "epoch": 0.37217787913340933,
+      "grad_norm": 0.1890312284231186,
+      "learning_rate": 8.793690568899216e-06,
+      "loss": 1.2203,
+      "step": 408
+    },
+    {
+      "epoch": 0.37309007981755987,
+      "grad_norm": 0.2785188555717468,
+      "learning_rate": 8.609218455224893e-06,
+      "loss": 1.3662,
+      "step": 409
+    },
+    {
+      "epoch": 0.37400228050171036,
+      "grad_norm": 0.3217642307281494,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 1.5266,
+      "step": 410
+    },
+    {
+      "epoch": 0.3749144811858609,
+      "grad_norm": 0.637722909450531,
+      "learning_rate": 8.245601184062852e-06,
+      "loss": 1.7853,
+      "step": 411
+    },
+    {
+      "epoch": 0.3758266818700114,
+      "grad_norm": 0.15114322304725647,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 0.8234,
+      "step": 412
+    },
+    {
+      "epoch": 0.37673888255416194,
+      "grad_norm": 0.15969091653823853,
+      "learning_rate": 7.889138314185678e-06,
+      "loss": 1.0191,
+      "step": 413
+    },
+    {
+      "epoch": 0.3776510832383124,
+      "grad_norm": 0.16418735682964325,
+      "learning_rate": 7.71360891480134e-06,
+      "loss": 1.1389,
+      "step": 414
+    },
+    {
+      "epoch": 0.37856328392246297,
+      "grad_norm": 0.15018145740032196,
+      "learning_rate": 7.539890923671062e-06,
+      "loss": 1.0788,
+      "step": 415
+    },
+    {
+      "epoch": 0.37947548460661346,
+      "grad_norm": 0.14109903573989868,
+      "learning_rate": 7.367991782295391e-06,
+      "loss": 0.9719,
+      "step": 416
+    },
+    {
+      "epoch": 0.38038768529076394,
+      "grad_norm": 0.1540416181087494,
+      "learning_rate": 7.197918854261432e-06,
+      "loss": 1.0884,
+      "step": 417
+    },
+    {
+      "epoch": 0.3812998859749145,
+      "grad_norm": 0.14207249879837036,
+      "learning_rate": 7.029679424927365e-06,
+      "loss": 1.1356,
+      "step": 418
+    },
+    {
+      "epoch": 0.382212086659065,
+      "grad_norm": 0.14175021648406982,
+      "learning_rate": 6.863280701110408e-06,
+      "loss": 0.8289,
+      "step": 419
+    },
+    {
+      "epoch": 0.3831242873432155,
+      "grad_norm": 0.1370278298854828,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.9297,
+      "step": 420
+    },
+    {
+      "epoch": 0.384036488027366,
+      "grad_norm": 0.2763122618198395,
+      "learning_rate": 6.536033802742813e-06,
+      "loss": 1.2414,
+      "step": 421
+    },
+    {
+      "epoch": 0.38494868871151655,
+      "grad_norm": 0.15884122252464294,
+      "learning_rate": 6.375199646360142e-06,
+      "loss": 1.0608,
+      "step": 422
+    },
+    {
+      "epoch": 0.38586088939566704,
+      "grad_norm": 0.14970403909683228,
+      "learning_rate": 6.216234231230012e-06,
+      "loss": 0.7528,
+      "step": 423
+    },
+    {
+      "epoch": 0.3867730900798176,
+      "grad_norm": 0.15861846506595612,
+      "learning_rate": 6.059144366901736e-06,
+      "loss": 0.924,
+      "step": 424
+    },
+    {
+      "epoch": 0.38768529076396807,
+      "grad_norm": 0.18967342376708984,
+      "learning_rate": 5.903936782582253e-06,
+      "loss": 1.0738,
+      "step": 425
+    },
+    {
+      "epoch": 0.38859749144811856,
+      "grad_norm": 0.1890667825937271,
+      "learning_rate": 5.750618126847912e-06,
+      "loss": 1.0767,
+      "step": 426
+    },
+    {
+      "epoch": 0.3895096921322691,
+      "grad_norm": 0.18360556662082672,
+      "learning_rate": 5.599194967359639e-06,
+      "loss": 1.137,
+      "step": 427
+    },
+    {
+      "epoch": 0.3904218928164196,
+      "grad_norm": 0.2094428986310959,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 1.0618,
+      "step": 428
+    },
+    {
+      "epoch": 0.39133409350057013,
+      "grad_norm": 0.17591425776481628,
+      "learning_rate": 5.302061001503394e-06,
+      "loss": 0.9315,
+      "step": 429
+    },
+    {
+      "epoch": 0.3922462941847206,
+      "grad_norm": 0.19204486906528473,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 1.1151,
+      "step": 430
+    },
+    {
+      "epoch": 0.39315849486887117,
+      "grad_norm": 0.19108223915100098,
+      "learning_rate": 5.012585797388936e-06,
+      "loss": 0.9007,
+      "step": 431
+    },
+    {
+      "epoch": 0.39407069555302165,
+      "grad_norm": 0.24487371742725372,
+      "learning_rate": 4.87073578250698e-06,
+      "loss": 1.0956,
+      "step": 432
+    },
+    {
+      "epoch": 0.3949828962371722,
+      "grad_norm": 0.36626699566841125,
+      "learning_rate": 4.730818955102234e-06,
+      "loss": 1.1817,
+      "step": 433
+    },
+    {
+      "epoch": 0.3958950969213227,
+      "grad_norm": 0.20722968876361847,
+      "learning_rate": 4.592841308745932e-06,
+      "loss": 1.1693,
+      "step": 434
+    },
+    {
+      "epoch": 0.39680729760547323,
+      "grad_norm": 0.18895329535007477,
+      "learning_rate": 4.456808753941205e-06,
+      "loss": 1.2137,
+      "step": 435
+    },
+    {
+      "epoch": 0.3977194982896237,
+      "grad_norm": 0.11202345043420792,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 1.1175,
+      "step": 436
+    },
+    {
+      "epoch": 0.3986316989737742,
+      "grad_norm": 0.10380493104457855,
+      "learning_rate": 4.190602144143207e-06,
+      "loss": 0.9969,
+      "step": 437
+    },
+    {
+      "epoch": 0.39954389965792475,
+      "grad_norm": 0.10471412539482117,
+      "learning_rate": 4.06043949255509e-06,
+      "loss": 1.1972,
+      "step": 438
+    },
+    {
+      "epoch": 0.40045610034207524,
+      "grad_norm": 0.10842544585466385,
+      "learning_rate": 3.932244738840379e-06,
+      "loss": 1.233,
+      "step": 439
+    },
+    {
+      "epoch": 0.4013683010262258,
+      "grad_norm": 0.1057286411523819,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 1.1387,
+      "step": 440
+    },
+    {
+      "epoch": 0.40228050171037627,
+      "grad_norm": 0.13041551411151886,
+      "learning_rate": 3.681780806244095e-06,
+      "loss": 1.3469,
+      "step": 441
+    },
+    {
+      "epoch": 0.4031927023945268,
+      "grad_norm": 0.14460065960884094,
+      "learning_rate": 3.5595223564037884e-06,
+      "loss": 1.2273,
+      "step": 442
+    },
+    {
+      "epoch": 0.4041049030786773,
+      "grad_norm": 0.1310551017522812,
+      "learning_rate": 3.4392532620598216e-06,
+      "loss": 1.2728,
+      "step": 443
+    },
+    {
+      "epoch": 0.40501710376282785,
+      "grad_norm": 0.15493756532669067,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 1.2911,
+      "step": 444
+    },
+    {
+      "epoch": 0.40592930444697833,
+      "grad_norm": 0.17276257276535034,
+      "learning_rate": 3.2047036621337236e-06,
+      "loss": 1.2539,
+      "step": 445
+    },
+    {
+      "epoch": 0.4068415051311288,
+      "grad_norm": 0.19491133093833923,
+      "learning_rate": 3.0904332038757977e-06,
+      "loss": 1.3042,
+      "step": 446
+    },
+    {
+      "epoch": 0.40775370581527937,
+      "grad_norm": 0.2031175196170807,
+      "learning_rate": 2.978172195332263e-06,
+      "loss": 1.3051,
+      "step": 447
+    },
+    {
+      "epoch": 0.40866590649942985,
+      "grad_norm": 0.24767906963825226,
+      "learning_rate": 2.8679254453910785e-06,
+      "loss": 1.3451,
+      "step": 448
+    },
+    {
+      "epoch": 0.4095781071835804,
+      "grad_norm": 0.3026193380355835,
+      "learning_rate": 2.759697676656098e-06,
+      "loss": 1.4116,
+      "step": 449
+    },
+    {
+      "epoch": 0.4104903078677309,
+      "grad_norm": 0.8892145156860352,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 1.6982,
+      "step": 450
+    },
+    {
+      "epoch": 0.41140250855188143,
+      "grad_norm": 0.13387838006019592,
+      "learning_rate": 2.549317540589308e-06,
+      "loss": 1.0503,
+      "step": 451
+    },
+    {
+      "epoch": 0.4123147092360319,
+      "grad_norm": 0.12145627290010452,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.9952,
+      "step": 452
+    },
+    {
+      "epoch": 0.41322690992018246,
+      "grad_norm": 0.14256002008914948,
+      "learning_rate": 2.3470678346851518e-06,
+      "loss": 1.0721,
+      "step": 453
+    },
+    {
+      "epoch": 0.41413911060433295,
+      "grad_norm": 0.13277588784694672,
+      "learning_rate": 2.2490027771406687e-06,
+      "loss": 1.0023,
+      "step": 454
+    },
+    {
+      "epoch": 0.4150513112884835,
+      "grad_norm": 0.14663711190223694,
+      "learning_rate": 2.152983213389559e-06,
+      "loss": 1.335,
+      "step": 455
+    },
+    {
+      "epoch": 0.415963511972634,
+      "grad_norm": 0.1930977702140808,
+      "learning_rate": 2.0590132565903476e-06,
+      "loss": 1.212,
+      "step": 456
+    },
+    {
+      "epoch": 0.41687571265678447,
+      "grad_norm": 0.19310270249843597,
+      "learning_rate": 1.9670969321032407e-06,
+      "loss": 1.281,
+      "step": 457
+    },
+    {
+      "epoch": 0.417787913340935,
+      "grad_norm": 0.20942994952201843,
+      "learning_rate": 1.8772381773176417e-06,
+      "loss": 1.5858,
+      "step": 458
+    },
+    {
+      "epoch": 0.4187001140250855,
+      "grad_norm": 0.1741369217634201,
+      "learning_rate": 1.7894408414835362e-06,
+      "loss": 1.1413,
+      "step": 459
+    },
+    {
+      "epoch": 0.41961231470923605,
+      "grad_norm": 0.2890353500843048,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 1.5824,
+      "step": 460
+    },
+    {
+      "epoch": 0.42052451539338653,
+      "grad_norm": 0.38743922114372253,
+      "learning_rate": 1.620045381987012e-06,
+      "loss": 1.2338,
+      "step": 461
+    },
+    {
+      "epoch": 0.4214367160775371,
+      "grad_norm": 0.4166823923587799,
+      "learning_rate": 1.5384545146622852e-06,
+      "loss": 1.4114,
+      "step": 462
+    },
+    {
+      "epoch": 0.42234891676168757,
+      "grad_norm": 0.16984394192695618,
+      "learning_rate": 1.4589395786535953e-06,
+      "loss": 1.033,
+      "step": 463
+    },
+    {
+      "epoch": 0.4232611174458381,
+      "grad_norm": 0.1662890464067459,
+      "learning_rate": 1.3815039801161721e-06,
+      "loss": 1.1955,
+      "step": 464
+    },
+    {
+      "epoch": 0.4241733181299886,
+      "grad_norm": 0.15266531705856323,
+      "learning_rate": 1.3061510361333185e-06,
+      "loss": 1.0023,
+      "step": 465
+    },
+    {
+      "epoch": 0.4250855188141391,
+      "grad_norm": 0.13052251935005188,
+      "learning_rate": 1.232883974574367e-06,
+      "loss": 0.7769,
+      "step": 466
+    },
+    {
+      "epoch": 0.42599771949828963,
+      "grad_norm": 0.15434938669204712,
+      "learning_rate": 1.1617059339563807e-06,
+      "loss": 0.9081,
+      "step": 467
+    },
+    {
+      "epoch": 0.4269099201824401,
+      "grad_norm": 0.1454930305480957,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.8577,
+      "step": 468
+    },
+    {
+      "epoch": 0.42782212086659066,
+      "grad_norm": 0.14353904128074646,
+      "learning_rate": 1.0256290220474307e-06,
+      "loss": 0.9633,
+      "step": 469
+    },
+    {
+      "epoch": 0.42873432155074115,
+      "grad_norm": 0.136052668094635,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.8172,
+      "step": 470
+    },
+    {
+      "epoch": 0.4296465222348917,
+      "grad_norm": 0.1676524579524994,
+      "learning_rate": 8.979436164848088e-07,
+      "loss": 1.0447,
+      "step": 471
+    },
+    {
+      "epoch": 0.4305587229190422,
+      "grad_norm": 0.2036374807357788,
+      "learning_rate": 8.372546218022747e-07,
+      "loss": 0.9286,
+      "step": 472
+    },
+    {
+      "epoch": 0.4314709236031927,
+      "grad_norm": 0.14977683126926422,
+      "learning_rate": 7.786715955054203e-07,
+      "loss": 0.9408,
+      "step": 473
+    },
+    {
+      "epoch": 0.4323831242873432,
+      "grad_norm": 0.1696736216545105,
+      "learning_rate": 7.221970470961125e-07,
+      "loss": 0.9405,
+      "step": 474
+    },
+    {
+      "epoch": 0.43329532497149376,
+      "grad_norm": 0.1618494838476181,
+      "learning_rate": 6.678333957560512e-07,
+      "loss": 1.0186,
+      "step": 475
+    },
+    {
+      "epoch": 0.43420752565564424,
+      "grad_norm": 0.19542407989501953,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 0.9891,
+      "step": 476
+    },
+    {
+      "epoch": 0.43511972633979473,
+      "grad_norm": 0.1792437881231308,
+      "learning_rate": 5.654480087916303e-07,
+      "loss": 0.786,
+      "step": 477
+    },
+    {
+      "epoch": 0.4360319270239453,
+      "grad_norm": 0.19154495000839233,
+      "learning_rate": 5.174306590164879e-07,
+      "loss": 1.026,
+      "step": 478
+    },
+    {
+      "epoch": 0.43694412770809576,
+      "grad_norm": 0.19262973964214325,
+      "learning_rate": 4.715329778211375e-07,
+      "loss": 1.1393,
+      "step": 479
+    },
+    {
+      "epoch": 0.4378563283922463,
+      "grad_norm": 0.19536390900611877,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 1.0381,
+      "step": 480
+    },
+    {
+      "epoch": 0.4387685290763968,
+      "grad_norm": 0.19628259539604187,
+      "learning_rate": 3.8610439470164737e-07,
+      "loss": 0.9981,
+      "step": 481
+    },
+    {
+      "epoch": 0.43968072976054734,
+      "grad_norm": 0.19776402413845062,
+      "learning_rate": 3.465771522536854e-07,
+      "loss": 1.0142,
+      "step": 482
+    },
+    {
+      "epoch": 0.44059293044469783,
+      "grad_norm": 0.21788012981414795,
+      "learning_rate": 3.09176897181096e-07,
+      "loss": 0.9793,
+      "step": 483
+    },
+    {
+      "epoch": 0.44150513112884837,
+      "grad_norm": 0.2997768521308899,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.0695,
+      "step": 484
+    },
+    {
+      "epoch": 0.44241733181299886,
+      "grad_norm": 0.27953919768333435,
+      "learning_rate": 2.407636663901591e-07,
+      "loss": 1.3182,
+      "step": 485
+    },
+    {
+      "epoch": 0.44332953249714935,
+      "grad_norm": 0.15639732778072357,
+      "learning_rate": 2.0975362126691712e-07,
+      "loss": 1.0899,
+      "step": 486
+    },
+    {
+      "epoch": 0.4442417331812999,
+      "grad_norm": 0.12085293978452682,
+      "learning_rate": 1.8087642458373134e-07,
+      "loss": 1.3242,
+      "step": 487
+    },
+    {
+      "epoch": 0.4451539338654504,
+      "grad_norm": 0.105677030980587,
+      "learning_rate": 1.5413331334360182e-07,
+      "loss": 1.2878,
+      "step": 488
+    },
+    {
+      "epoch": 0.4460661345496009,
+      "grad_norm": 0.12019108980894089,
+      "learning_rate": 1.2952543313240472e-07,
+      "loss": 1.076,
+      "step": 489
+    },
+    {
+      "epoch": 0.4469783352337514,
+      "grad_norm": 0.12178989499807358,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 1.2949,
+      "step": 490
+    },
+    {
+      "epoch": 0.44789053591790196,
+      "grad_norm": 0.13249683380126953,
+      "learning_rate": 8.671949076420882e-08,
+      "loss": 1.4278,
+      "step": 491
+    },
+    {
+      "epoch": 0.44880273660205244,
+      "grad_norm": 0.13811670243740082,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 1.3262,
+      "step": 492
+    },
+    {
+      "epoch": 0.449714937286203,
+      "grad_norm": 0.14182856678962708,
+      "learning_rate": 5.246593205699424e-08,
+      "loss": 1.3477,
+      "step": 493
+    },
+    {
+      "epoch": 0.4506271379703535,
+      "grad_norm": 0.1499001681804657,
+      "learning_rate": 3.8548187963854956e-08,
+      "loss": 1.2677,
+      "step": 494
+    },
+    {
+      "epoch": 0.45153933865450396,
+      "grad_norm": 0.16916415095329285,
+      "learning_rate": 2.6770626181715773e-08,
+      "loss": 1.3193,
+      "step": 495
+    },
+    {
+      "epoch": 0.4524515393386545,
+      "grad_norm": 0.20377494394779205,
+      "learning_rate": 1.7133751222137007e-08,
+      "loss": 1.357,
+      "step": 496
+    },
+    {
+      "epoch": 0.453363740022805,
+      "grad_norm": 0.2306216061115265,
+      "learning_rate": 9.637975896759077e-09,
+      "loss": 1.7053,
+      "step": 497
+    },
+    {
+      "epoch": 0.45427594070695554,
+      "grad_norm": 0.2522677481174469,
+      "learning_rate": 4.2836212996499865e-09,
+      "loss": 1.3653,
+      "step": 498
+    },
+    {
+      "epoch": 0.455188141391106,
+      "grad_norm": 0.33547061681747437,
+      "learning_rate": 1.0709167935385455e-09,
+      "loss": 1.4946,
+      "step": 499
+    },
+    {
+      "epoch": 0.45610034207525657,
+      "grad_norm": 0.8273468017578125,
+      "learning_rate": 0.0,
+      "loss": 1.7554,
+      "step": 500
+    },
+    {
+      "epoch": 0.45610034207525657,
+      "eval_loss": 1.195978045463562,
+      "eval_runtime": 113.8887,
+      "eval_samples_per_second": 16.209,
+      "eval_steps_per_second": 4.057,
+      "step": 500
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.202037058732032e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null