Training in progress, step 600, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8c3b7354bdafaf40f8c3750e79685a988156602c01307cad8150c8d3191850d
 size 289512208

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc7f9f12a7d975eceff199ac96f1fb4abb58016ab42da13fe25b1845a1e0b3ce
 size 289512208

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:062ebcb0ddbe2ed847f0c77f0c46ff28b6fe4af7fe26bb63c3feadce3f4f1df6
 size 147781972

 version https://git-lfs.github.com/spec/v1
+oid sha256:37a24bd2239b20338e528442d83d2139315c8017ad79ac6456a27ebc2a7c4982
 size 147781972

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2cd9da745e01d9c9e271b256eba61ac6e1f1f956439fe5a27252af92b2c1936d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:28ace9fc649252ea1299cd2d9b1953184b717d1b1778bd2d51cf81f8fdd857fb
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:409f5f9e71c66aad357639b3334ffe5097e507a7b7ddeed673d739f3f24cfba8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:14de197ce4fca667a77214b11d375124cfec5ed9c075fb60180e734827aaa864
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2162970304489136,
-  "best_model_checkpoint": "miner_id_24/checkpoint-500",
-  "epoch": 0.031782606968336576,
   "eval_steps": 100,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3555,6 +3555,714 @@
       "eval_samples_per_second": 4.035,
       "eval_steps_per_second": 1.009,
       "step": 500
     }
   ],
   "logging_steps": 1,
@@ -3583,7 +4291,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.59811262332928e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.2089511156082153,
+  "best_model_checkpoint": "miner_id_24/checkpoint-600",
+  "epoch": 0.03813912836200389,
   "eval_steps": 100,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.035,
       "eval_steps_per_second": 1.009,
       "step": 500
+    },
+    {
+      "epoch": 0.03184617218227325,
+      "grad_norm": 0.26652681827545166,
+      "learning_rate": 7.162871012503003e-05,
+      "loss": 1.2841,
+      "step": 501
+    },
+    {
+      "epoch": 0.031909737396209926,
+      "grad_norm": 0.258722722530365,
+      "learning_rate": 7.126596213658488e-05,
+      "loss": 1.3076,
+      "step": 502
+    },
+    {
+      "epoch": 0.031973302610146595,
+      "grad_norm": 0.2519145607948303,
+      "learning_rate": 7.090362580911808e-05,
+      "loss": 1.3191,
+      "step": 503
+    },
+    {
+      "epoch": 0.03203686782408327,
+      "grad_norm": 0.24408216774463654,
+      "learning_rate": 7.05417063336761e-05,
+      "loss": 1.2056,
+      "step": 504
+    },
+    {
+      "epoch": 0.032100433038019946,
+      "grad_norm": 0.25859251618385315,
+      "learning_rate": 7.018020889533348e-05,
+      "loss": 1.2509,
+      "step": 505
+    },
+    {
+      "epoch": 0.032163998251956614,
+      "grad_norm": 0.274295836687088,
+      "learning_rate": 6.981913867311819e-05,
+      "loss": 1.1457,
+      "step": 506
+    },
+    {
+      "epoch": 0.03222756346589329,
+      "grad_norm": 0.25392991304397583,
+      "learning_rate": 6.945850083993781e-05,
+      "loss": 1.1389,
+      "step": 507
+    },
+    {
+      "epoch": 0.032291128679829965,
+      "grad_norm": 0.2518613338470459,
+      "learning_rate": 6.909830056250527e-05,
+      "loss": 1.1288,
+      "step": 508
+    },
+    {
+      "epoch": 0.032354693893766634,
+      "grad_norm": 0.25153687596321106,
+      "learning_rate": 6.873854300126467e-05,
+      "loss": 1.1106,
+      "step": 509
+    },
+    {
+      "epoch": 0.03241825910770331,
+      "grad_norm": 0.2594717741012573,
+      "learning_rate": 6.83792333103176e-05,
+      "loss": 1.2556,
+      "step": 510
+    },
+    {
+      "epoch": 0.032481824321639985,
+      "grad_norm": 0.2567812204360962,
+      "learning_rate": 6.802037663734923e-05,
+      "loss": 1.1773,
+      "step": 511
+    },
+    {
+      "epoch": 0.03254538953557665,
+      "grad_norm": 0.2533416748046875,
+      "learning_rate": 6.766197812355438e-05,
+      "loss": 1.2053,
+      "step": 512
+    },
+    {
+      "epoch": 0.03260895474951333,
+      "grad_norm": 0.2625131607055664,
+      "learning_rate": 6.73040429035641e-05,
+      "loss": 1.2467,
+      "step": 513
+    },
+    {
+      "epoch": 0.032672519963450004,
+      "grad_norm": 0.2665756344795227,
+      "learning_rate": 6.69465761053721e-05,
+      "loss": 1.2737,
+      "step": 514
+    },
+    {
+      "epoch": 0.03273608517738667,
+      "grad_norm": 0.2558302581310272,
+      "learning_rate": 6.658958285026102e-05,
+      "loss": 1.2684,
+      "step": 515
+    },
+    {
+      "epoch": 0.03279965039132335,
+      "grad_norm": 0.24979649484157562,
+      "learning_rate": 6.623306825272937e-05,
+      "loss": 1.2144,
+      "step": 516
+    },
+    {
+      "epoch": 0.032863215605260024,
+      "grad_norm": 0.25010988116264343,
+      "learning_rate": 6.58770374204181e-05,
+      "loss": 1.2679,
+      "step": 517
+    },
+    {
+      "epoch": 0.03292678081919669,
+      "grad_norm": 0.2494756430387497,
+      "learning_rate": 6.552149545403739e-05,
+      "loss": 1.2291,
+      "step": 518
+    },
+    {
+      "epoch": 0.03299034603313337,
+      "grad_norm": 0.2528434097766876,
+      "learning_rate": 6.516644744729367e-05,
+      "loss": 1.2519,
+      "step": 519
+    },
+    {
+      "epoch": 0.03305391124707004,
+      "grad_norm": 0.2631024718284607,
+      "learning_rate": 6.48118984868167e-05,
+      "loss": 1.2086,
+      "step": 520
+    },
+    {
+      "epoch": 0.03311747646100671,
+      "grad_norm": 0.2514025866985321,
+      "learning_rate": 6.445785365208645e-05,
+      "loss": 1.2098,
+      "step": 521
+    },
+    {
+      "epoch": 0.03318104167494339,
+      "grad_norm": 0.25096845626831055,
+      "learning_rate": 6.410431801536058e-05,
+      "loss": 1.2062,
+      "step": 522
+    },
+    {
+      "epoch": 0.03324460688888006,
+      "grad_norm": 0.24552412331104279,
+      "learning_rate": 6.375129664160168e-05,
+      "loss": 1.139,
+      "step": 523
+    },
+    {
+      "epoch": 0.03330817210281673,
+      "grad_norm": 0.257235586643219,
+      "learning_rate": 6.339879458840465e-05,
+      "loss": 1.3102,
+      "step": 524
+    },
+    {
+      "epoch": 0.03337173731675341,
+      "grad_norm": 0.24831882119178772,
+      "learning_rate": 6.304681690592431e-05,
+      "loss": 1.1848,
+      "step": 525
+    },
+    {
+      "epoch": 0.03343530253069008,
+      "grad_norm": 0.2522595524787903,
+      "learning_rate": 6.269536863680307e-05,
+      "loss": 1.228,
+      "step": 526
+    },
+    {
+      "epoch": 0.03349886774462675,
+      "grad_norm": 0.25006258487701416,
+      "learning_rate": 6.23444548160986e-05,
+      "loss": 1.1059,
+      "step": 527
+    },
+    {
+      "epoch": 0.033562432958563426,
+      "grad_norm": 0.2498662769794464,
+      "learning_rate": 6.199408047121174e-05,
+      "loss": 1.2354,
+      "step": 528
+    },
+    {
+      "epoch": 0.0336259981725001,
+      "grad_norm": 0.25638020038604736,
+      "learning_rate": 6.16442506218146e-05,
+      "loss": 1.2332,
+      "step": 529
+    },
+    {
+      "epoch": 0.03368956338643677,
+      "grad_norm": 0.26172903180122375,
+      "learning_rate": 6.129497027977829e-05,
+      "loss": 1.2062,
+      "step": 530
+    },
+    {
+      "epoch": 0.033753128600373446,
+      "grad_norm": 0.2451334148645401,
+      "learning_rate": 6.0946244449101574e-05,
+      "loss": 1.2513,
+      "step": 531
+    },
+    {
+      "epoch": 0.03381669381431012,
+      "grad_norm": 0.2550620436668396,
+      "learning_rate": 6.059807812583883e-05,
+      "loss": 1.08,
+      "step": 532
+    },
+    {
+      "epoch": 0.03388025902824679,
+      "grad_norm": 0.2467305064201355,
+      "learning_rate": 6.02504762980286e-05,
+      "loss": 1.1822,
+      "step": 533
+    },
+    {
+      "epoch": 0.033943824242183465,
+      "grad_norm": 0.2539975345134735,
+      "learning_rate": 5.990344394562226e-05,
+      "loss": 1.2334,
+      "step": 534
+    },
+    {
+      "epoch": 0.03400738945612014,
+      "grad_norm": 0.26874592900276184,
+      "learning_rate": 5.955698604041231e-05,
+      "loss": 1.2736,
+      "step": 535
+    },
+    {
+      "epoch": 0.03407095467005681,
+      "grad_norm": 0.25802844762802124,
+      "learning_rate": 5.92111075459616e-05,
+      "loss": 1.1959,
+      "step": 536
+    },
+    {
+      "epoch": 0.034134519883993485,
+      "grad_norm": 0.2507290542125702,
+      "learning_rate": 5.88658134175319e-05,
+      "loss": 1.2045,
+      "step": 537
+    },
+    {
+      "epoch": 0.03419808509793016,
+      "grad_norm": 0.25168654322624207,
+      "learning_rate": 5.852110860201294e-05,
+      "loss": 1.259,
+      "step": 538
+    },
+    {
+      "epoch": 0.03426165031186683,
+      "grad_norm": 0.25015729665756226,
+      "learning_rate": 5.817699803785174e-05,
+      "loss": 1.0865,
+      "step": 539
+    },
+    {
+      "epoch": 0.034325215525803504,
+      "grad_norm": 0.24665945768356323,
+      "learning_rate": 5.7833486654981606e-05,
+      "loss": 1.1366,
+      "step": 540
+    },
+    {
+      "epoch": 0.03438878073974018,
+      "grad_norm": 0.2521713972091675,
+      "learning_rate": 5.7490579374751686e-05,
+      "loss": 1.2052,
+      "step": 541
+    },
+    {
+      "epoch": 0.03445234595367685,
+      "grad_norm": 0.24369041621685028,
+      "learning_rate": 5.714828110985635e-05,
+      "loss": 1.1564,
+      "step": 542
+    },
+    {
+      "epoch": 0.034515911167613524,
+      "grad_norm": 0.2600708603858948,
+      "learning_rate": 5.6806596764264874e-05,
+      "loss": 1.1852,
+      "step": 543
+    },
+    {
+      "epoch": 0.0345794763815502,
+      "grad_norm": 0.25693967938423157,
+      "learning_rate": 5.6465531233151126e-05,
+      "loss": 1.1887,
+      "step": 544
+    },
+    {
+      "epoch": 0.03464304159548687,
+      "grad_norm": 0.24953435361385345,
+      "learning_rate": 5.6125089402823485e-05,
+      "loss": 1.0897,
+      "step": 545
+    },
+    {
+      "epoch": 0.03470660680942354,
+      "grad_norm": 0.2587718665599823,
+      "learning_rate": 5.578527615065492e-05,
+      "loss": 1.1345,
+      "step": 546
+    },
+    {
+      "epoch": 0.03477017202336022,
+      "grad_norm": 0.2567615807056427,
+      "learning_rate": 5.544609634501279e-05,
+      "loss": 1.277,
+      "step": 547
+    },
+    {
+      "epoch": 0.03483373723729689,
+      "grad_norm": 0.25938692688941956,
+      "learning_rate": 5.510755484518955e-05,
+      "loss": 1.2087,
+      "step": 548
+    },
+    {
+      "epoch": 0.03489730245123356,
+      "grad_norm": 0.25982430577278137,
+      "learning_rate": 5.476965650133279e-05,
+      "loss": 1.2554,
+      "step": 549
+    },
+    {
+      "epoch": 0.03496086766517024,
+      "grad_norm": 0.25441205501556396,
+      "learning_rate": 5.443240615437586e-05,
+      "loss": 1.1437,
+      "step": 550
+    },
+    {
+      "epoch": 0.035024432879106907,
+      "grad_norm": 0.2553006410598755,
+      "learning_rate": 5.4095808635968546e-05,
+      "loss": 1.2544,
+      "step": 551
+    },
+    {
+      "epoch": 0.03508799809304358,
+      "grad_norm": 0.2520417273044586,
+      "learning_rate": 5.375986876840784e-05,
+      "loss": 1.0661,
+      "step": 552
+    },
+    {
+      "epoch": 0.03515156330698026,
+      "grad_norm": 0.2597469091415405,
+      "learning_rate": 5.342459136456881e-05,
+      "loss": 1.1732,
+      "step": 553
+    },
+    {
+      "epoch": 0.035215128520916926,
+      "grad_norm": 0.2597144544124603,
+      "learning_rate": 5.30899812278356e-05,
+      "loss": 1.2128,
+      "step": 554
+    },
+    {
+      "epoch": 0.0352786937348536,
+      "grad_norm": 0.2565470039844513,
+      "learning_rate": 5.275604315203293e-05,
+      "loss": 1.1585,
+      "step": 555
+    },
+    {
+      "epoch": 0.03534225894879028,
+      "grad_norm": 0.24314232170581818,
+      "learning_rate": 5.2422781921356826e-05,
+      "loss": 1.0955,
+      "step": 556
+    },
+    {
+      "epoch": 0.035405824162726945,
+      "grad_norm": 0.24443262815475464,
+      "learning_rate": 5.209020231030672e-05,
+      "loss": 1.1649,
+      "step": 557
+    },
+    {
+      "epoch": 0.03546938937666362,
+      "grad_norm": 0.26083680987358093,
+      "learning_rate": 5.1758309083616673e-05,
+      "loss": 1.2195,
+      "step": 558
+    },
+    {
+      "epoch": 0.035532954590600296,
+      "grad_norm": 0.25181084871292114,
+      "learning_rate": 5.142710699618701e-05,
+      "loss": 1.2411,
+      "step": 559
+    },
+    {
+      "epoch": 0.035596519804536965,
+      "grad_norm": 0.24913780391216278,
+      "learning_rate": 5.109660079301668e-05,
+      "loss": 1.2251,
+      "step": 560
+    },
+    {
+      "epoch": 0.03566008501847364,
+      "grad_norm": 0.2578226625919342,
+      "learning_rate": 5.076679520913479e-05,
+      "loss": 1.1685,
+      "step": 561
+    },
+    {
+      "epoch": 0.035723650232410316,
+      "grad_norm": 0.25115758180618286,
+      "learning_rate": 5.043769496953299e-05,
+      "loss": 1.0909,
+      "step": 562
+    },
+    {
+      "epoch": 0.035787215446346984,
+      "grad_norm": 0.24539780616760254,
+      "learning_rate": 5.010930478909779e-05,
+      "loss": 1.221,
+      "step": 563
+    },
+    {
+      "epoch": 0.03585078066028366,
+      "grad_norm": 0.26085364818573,
+      "learning_rate": 4.9781629372542895e-05,
+      "loss": 1.2141,
+      "step": 564
+    },
+    {
+      "epoch": 0.035914345874220335,
+      "grad_norm": 0.25367042422294617,
+      "learning_rate": 4.945467341434195e-05,
+      "loss": 1.2999,
+      "step": 565
+    },
+    {
+      "epoch": 0.035977911088157004,
+      "grad_norm": 0.27679672837257385,
+      "learning_rate": 4.912844159866112e-05,
+      "loss": 1.2494,
+      "step": 566
+    },
+    {
+      "epoch": 0.03604147630209368,
+      "grad_norm": 0.2423914223909378,
+      "learning_rate": 4.880293859929227e-05,
+      "loss": 1.1681,
+      "step": 567
+    },
+    {
+      "epoch": 0.036105041516030355,
+      "grad_norm": 0.24873730540275574,
+      "learning_rate": 4.847816907958549e-05,
+      "loss": 1.2964,
+      "step": 568
+    },
+    {
+      "epoch": 0.03616860672996702,
+      "grad_norm": 0.2618268132209778,
+      "learning_rate": 4.8154137692382907e-05,
+      "loss": 1.2184,
+      "step": 569
+    },
+    {
+      "epoch": 0.0362321719439037,
+      "grad_norm": 0.2559436857700348,
+      "learning_rate": 4.783084907995156e-05,
+      "loss": 1.216,
+      "step": 570
+    },
+    {
+      "epoch": 0.036295737157840374,
+      "grad_norm": 0.2557474970817566,
+      "learning_rate": 4.750830787391708e-05,
+      "loss": 1.2993,
+      "step": 571
+    },
+    {
+      "epoch": 0.03635930237177704,
+      "grad_norm": 0.25075972080230713,
+      "learning_rate": 4.718651869519731e-05,
+      "loss": 1.1178,
+      "step": 572
+    },
+    {
+      "epoch": 0.03642286758571372,
+      "grad_norm": 0.2534898817539215,
+      "learning_rate": 4.686548615393613e-05,
+      "loss": 1.1891,
+      "step": 573
+    },
+    {
+      "epoch": 0.036486432799650394,
+      "grad_norm": 0.26913774013519287,
+      "learning_rate": 4.654521484943735e-05,
+      "loss": 1.2552,
+      "step": 574
+    },
+    {
+      "epoch": 0.03654999801358706,
+      "grad_norm": 0.2566869556903839,
+      "learning_rate": 4.622570937009879e-05,
+      "loss": 1.3527,
+      "step": 575
+    },
+    {
+      "epoch": 0.03661356322752374,
+      "grad_norm": 0.26296791434288025,
+      "learning_rate": 4.59069742933468e-05,
+      "loss": 1.2464,
+      "step": 576
+    },
+    {
+      "epoch": 0.03667712844146041,
+      "grad_norm": 0.261929988861084,
+      "learning_rate": 4.558901418557021e-05,
+      "loss": 1.2744,
+      "step": 577
+    },
+    {
+      "epoch": 0.03674069365539708,
+      "grad_norm": 0.252572238445282,
+      "learning_rate": 4.527183360205541e-05,
+      "loss": 1.112,
+      "step": 578
+    },
+    {
+      "epoch": 0.03680425886933376,
+      "grad_norm": 0.27232858538627625,
+      "learning_rate": 4.495543708692075e-05,
+      "loss": 1.2743,
+      "step": 579
+    },
+    {
+      "epoch": 0.03686782408327043,
+      "grad_norm": 0.2609007656574249,
+      "learning_rate": 4.4639829173051554e-05,
+      "loss": 1.3188,
+      "step": 580
+    },
+    {
+      "epoch": 0.0369313892972071,
+      "grad_norm": 0.2632231116294861,
+      "learning_rate": 4.43250143820352e-05,
+      "loss": 1.1243,
+      "step": 581
+    },
+    {
+      "epoch": 0.03699495451114378,
+      "grad_norm": 0.2551668882369995,
+      "learning_rate": 4.401099722409631e-05,
+      "loss": 1.1864,
+      "step": 582
+    },
+    {
+      "epoch": 0.03705851972508045,
+      "grad_norm": 0.2566946744918823,
+      "learning_rate": 4.369778219803211e-05,
+      "loss": 1.2117,
+      "step": 583
+    },
+    {
+      "epoch": 0.03712208493901712,
+      "grad_norm": 0.2648017704486847,
+      "learning_rate": 4.338537379114801e-05,
+      "loss": 1.2357,
+      "step": 584
+    },
+    {
+      "epoch": 0.037185650152953796,
+      "grad_norm": 0.24553461372852325,
+      "learning_rate": 4.307377647919343e-05,
+      "loss": 1.1774,
+      "step": 585
+    },
+    {
+      "epoch": 0.03724921536689047,
+      "grad_norm": 0.24831490218639374,
+      "learning_rate": 4.2762994726297346e-05,
+      "loss": 1.1336,
+      "step": 586
+    },
+    {
+      "epoch": 0.03731278058082714,
+      "grad_norm": 0.2644321024417877,
+      "learning_rate": 4.245303298490467e-05,
+      "loss": 1.1865,
+      "step": 587
+    },
+    {
+      "epoch": 0.037376345794763816,
+      "grad_norm": 0.2654249370098114,
+      "learning_rate": 4.2143895695712444e-05,
+      "loss": 1.1872,
+      "step": 588
+    },
+    {
+      "epoch": 0.03743991100870049,
+      "grad_norm": 0.24722780287265778,
+      "learning_rate": 4.183558728760586e-05,
+      "loss": 1.1609,
+      "step": 589
+    },
+    {
+      "epoch": 0.03750347622263716,
+      "grad_norm": 0.2560960054397583,
+      "learning_rate": 4.152811217759529e-05,
+      "loss": 1.1897,
+      "step": 590
+    },
+    {
+      "epoch": 0.037567041436573835,
+      "grad_norm": 0.24647943675518036,
+      "learning_rate": 4.12214747707527e-05,
+      "loss": 1.1098,
+      "step": 591
+    },
+    {
+      "epoch": 0.03763060665051051,
+      "grad_norm": 0.2763408422470093,
+      "learning_rate": 4.091567946014858e-05,
+      "loss": 1.2313,
+      "step": 592
+    },
+    {
+      "epoch": 0.03769417186444718,
+      "grad_norm": 0.254727303981781,
+      "learning_rate": 4.061073062678912e-05,
+      "loss": 1.2363,
+      "step": 593
+    },
+    {
+      "epoch": 0.037757737078383855,
+      "grad_norm": 0.2722640037536621,
+      "learning_rate": 4.0306632639553323e-05,
+      "loss": 1.1633,
+      "step": 594
+    },
+    {
+      "epoch": 0.03782130229232053,
+      "grad_norm": 0.2594556212425232,
+      "learning_rate": 4.000338985513046e-05,
+      "loss": 1.224,
+      "step": 595
+    },
+    {
+      "epoch": 0.0378848675062572,
+      "grad_norm": 0.25068074464797974,
+      "learning_rate": 3.970100661795766e-05,
+      "loss": 1.1809,
+      "step": 596
+    },
+    {
+      "epoch": 0.037948432720193874,
+      "grad_norm": 0.2550009787082672,
+      "learning_rate": 3.9399487260157766e-05,
+      "loss": 1.2022,
+      "step": 597
+    },
+    {
+      "epoch": 0.03801199793413055,
+      "grad_norm": 0.2593897879123688,
+      "learning_rate": 3.909883610147696e-05,
+      "loss": 1.3124,
+      "step": 598
+    },
+    {
+      "epoch": 0.03807556314806722,
+      "grad_norm": 0.2558402717113495,
+      "learning_rate": 3.879905744922329e-05,
+      "loss": 1.1618,
+      "step": 599
+    },
+    {
+      "epoch": 0.03813912836200389,
+      "grad_norm": 0.25934746861457825,
+      "learning_rate": 3.8500155598204644e-05,
+      "loss": 1.0083,
+      "step": 600
+    },
+    {
+      "epoch": 0.03813912836200389,
+      "eval_loss": 1.2089511156082153,
+      "eval_runtime": 1239.249,
+      "eval_samples_per_second": 4.035,
+      "eval_steps_per_second": 1.009,
+      "step": 600
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.117735147995136e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null