Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4c219c18214b213cee8d94f449b995fe32c8aa64dc76561b1c68851c62fc101
 size 144805440

 version https://git-lfs.github.com/spec/v1
+oid sha256:30bed0be07b2d262a7c9c2ee8a0b78c224c223f1dfe6648616e1f608c414714b
 size 144805440

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36ad63dcd673a060cd68ac548d5829ddf424ad566d880bcca00537cdefa09546
 size 74292308

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2c1169ba7d2c7f4cb773678dd77d250d9458b9f604eb63ec725d7237ee35516
 size 74292308

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58b76951fb636bce8b43199b98ae5224c4e9385da07a8a11997d1f9ff34c8cf3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:557756899814dbd279052b049fe3fe1a6703f1aa93f393d968ec951e02adc041
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3995ed3f96ce0da04c996d47152b968f30c76db746cafcaa4522fce0739b111
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfd113e2bf8a699da59a6073bb1bcb18aa32fb9667d59893641ceb623ba2aa2b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2362135648727417,
-  "best_model_checkpoint": "miner_id_24/checkpoint-300",
-  "epoch": 0.03813912836200389,
   "eval_steps": 100,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2139,6 +2139,714 @@
       "eval_samples_per_second": 3.977,
       "eval_steps_per_second": 0.994,
       "step": 300
     }
   ],
   "logging_steps": 1,
@@ -2167,7 +2875,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.1092000681885696e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.2253398895263672,
+  "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 0.05085217114933852,
   "eval_steps": 100,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.977,
       "eval_steps_per_second": 0.994,
       "step": 300
+    },
+    {
+      "epoch": 0.03826625878987724,
+      "grad_norm": 0.19571375846862793,
+      "learning_rate": 8.843335487049712e-05,
+      "loss": 1.2259,
+      "step": 301
+    },
+    {
+      "epoch": 0.03839338921775059,
+      "grad_norm": 0.19907177984714508,
+      "learning_rate": 8.785781316514841e-05,
+      "loss": 1.1946,
+      "step": 302
+    },
+    {
+      "epoch": 0.03852051964562393,
+      "grad_norm": 0.19650404155254364,
+      "learning_rate": 8.728267940034078e-05,
+      "loss": 1.1977,
+      "step": 303
+    },
+    {
+      "epoch": 0.038647650073497276,
+      "grad_norm": 0.1937037855386734,
+      "learning_rate": 8.670797289881915e-05,
+      "loss": 1.1719,
+      "step": 304
+    },
+    {
+      "epoch": 0.03877478050137063,
+      "grad_norm": 0.2028638869524002,
+      "learning_rate": 8.61337129689737e-05,
+      "loss": 1.224,
+      "step": 305
+    },
+    {
+      "epoch": 0.03890191092924397,
+      "grad_norm": 0.19181466102600098,
+      "learning_rate": 8.555991890419117e-05,
+      "loss": 1.2375,
+      "step": 306
+    },
+    {
+      "epoch": 0.039029041357117315,
+      "grad_norm": 0.1887066513299942,
+      "learning_rate": 8.498660998220669e-05,
+      "loss": 1.1786,
+      "step": 307
+    },
+    {
+      "epoch": 0.039156171784990666,
+      "grad_norm": 0.18820159137248993,
+      "learning_rate": 8.441380546445603e-05,
+      "loss": 1.2536,
+      "step": 308
+    },
+    {
+      "epoch": 0.03928330221286401,
+      "grad_norm": 0.18667809665203094,
+      "learning_rate": 8.384152459542848e-05,
+      "loss": 1.2834,
+      "step": 309
+    },
+    {
+      "epoch": 0.039410432640737354,
+      "grad_norm": 0.19369632005691528,
+      "learning_rate": 8.326978660202034e-05,
+      "loss": 1.2989,
+      "step": 310
+    },
+    {
+      "epoch": 0.039537563068610705,
+      "grad_norm": 0.18864746391773224,
+      "learning_rate": 8.269861069288903e-05,
+      "loss": 1.292,
+      "step": 311
+    },
+    {
+      "epoch": 0.03966469349648405,
+      "grad_norm": 0.19906029105186462,
+      "learning_rate": 8.212801605780753e-05,
+      "loss": 1.2855,
+      "step": 312
+    },
+    {
+      "epoch": 0.03979182392435739,
+      "grad_norm": 0.19700987637043,
+      "learning_rate": 8.155802186701984e-05,
+      "loss": 1.1771,
+      "step": 313
+    },
+    {
+      "epoch": 0.039918954352230744,
+      "grad_norm": 0.20134317874908447,
+      "learning_rate": 8.098864727059685e-05,
+      "loss": 1.1995,
+      "step": 314
+    },
+    {
+      "epoch": 0.04004608478010409,
+      "grad_norm": 0.1883343607187271,
+      "learning_rate": 8.04199113977929e-05,
+      "loss": 1.2433,
+      "step": 315
+    },
+    {
+      "epoch": 0.04017321520797743,
+      "grad_norm": 0.19041708111763,
+      "learning_rate": 7.985183335640331e-05,
+      "loss": 1.2538,
+      "step": 316
+    },
+    {
+      "epoch": 0.04030034563585078,
+      "grad_norm": 0.1838679164648056,
+      "learning_rate": 7.928443223212215e-05,
+      "loss": 1.2025,
+      "step": 317
+    },
+    {
+      "epoch": 0.04042747606372413,
+      "grad_norm": 0.19493237137794495,
+      "learning_rate": 7.871772708790114e-05,
+      "loss": 1.2553,
+      "step": 318
+    },
+    {
+      "epoch": 0.04055460649159747,
+      "grad_norm": 0.197859525680542,
+      "learning_rate": 7.815173696330919e-05,
+      "loss": 1.2661,
+      "step": 319
+    },
+    {
+      "epoch": 0.04068173691947082,
+      "grad_norm": 0.19427183270454407,
+      "learning_rate": 7.758648087389277e-05,
+      "loss": 1.2121,
+      "step": 320
+    },
+    {
+      "epoch": 0.040808867347344166,
+      "grad_norm": 0.19236573576927185,
+      "learning_rate": 7.702197781053696e-05,
+      "loss": 1.2375,
+      "step": 321
+    },
+    {
+      "epoch": 0.04093599777521751,
+      "grad_norm": 0.19856838881969452,
+      "learning_rate": 7.645824673882748e-05,
+      "loss": 1.2648,
+      "step": 322
+    },
+    {
+      "epoch": 0.04106312820309086,
+      "grad_norm": 0.20721471309661865,
+      "learning_rate": 7.589530659841349e-05,
+      "loss": 1.2503,
+      "step": 323
+    },
+    {
+      "epoch": 0.041190258630964205,
+      "grad_norm": 0.19413287937641144,
+      "learning_rate": 7.533317630237117e-05,
+      "loss": 1.265,
+      "step": 324
+    },
+    {
+      "epoch": 0.04131738905883755,
+      "grad_norm": 0.1948065459728241,
+      "learning_rate": 7.477187473656853e-05,
+      "loss": 1.2581,
+      "step": 325
+    },
+    {
+      "epoch": 0.0414445194867109,
+      "grad_norm": 0.19630448520183563,
+      "learning_rate": 7.421142075903067e-05,
+      "loss": 1.2013,
+      "step": 326
+    },
+    {
+      "epoch": 0.041571649914584244,
+      "grad_norm": 0.18867121636867523,
+      "learning_rate": 7.365183319930635e-05,
+      "loss": 1.1628,
+      "step": 327
+    },
+    {
+      "epoch": 0.04169878034245759,
+      "grad_norm": 0.2017098367214203,
+      "learning_rate": 7.309313085783524e-05,
+      "loss": 1.1882,
+      "step": 328
+    },
+    {
+      "epoch": 0.04182591077033094,
+      "grad_norm": 0.19574840366840363,
+      "learning_rate": 7.253533250531656e-05,
+      "loss": 1.1917,
+      "step": 329
+    },
+    {
+      "epoch": 0.04195304119820428,
+      "grad_norm": 0.2003111094236374,
+      "learning_rate": 7.197845688207805e-05,
+      "loss": 1.3069,
+      "step": 330
+    },
+    {
+      "epoch": 0.04208017162607763,
+      "grad_norm": 0.19444699585437775,
+      "learning_rate": 7.142252269744665e-05,
+      "loss": 1.1623,
+      "step": 331
+    },
+    {
+      "epoch": 0.04220730205395098,
+      "grad_norm": 0.19306592643260956,
+      "learning_rate": 7.086754862911982e-05,
+      "loss": 1.2512,
+      "step": 332
+    },
+    {
+      "epoch": 0.04233443248182432,
+      "grad_norm": 0.1940678060054779,
+      "learning_rate": 7.031355332253795e-05,
+      "loss": 1.2404,
+      "step": 333
+    },
+    {
+      "epoch": 0.042461562909697666,
+      "grad_norm": 0.20104342699050903,
+      "learning_rate": 6.976055539025818e-05,
+      "loss": 1.1826,
+      "step": 334
+    },
+    {
+      "epoch": 0.04258869333757102,
+      "grad_norm": 0.20509012043476105,
+      "learning_rate": 6.92085734113288e-05,
+      "loss": 1.2247,
+      "step": 335
+    },
+    {
+      "epoch": 0.04271582376544436,
+      "grad_norm": 0.2038545161485672,
+      "learning_rate": 6.865762593066513e-05,
+      "loss": 1.25,
+      "step": 336
+    },
+    {
+      "epoch": 0.042842954193317705,
+      "grad_norm": 0.20035366714000702,
+      "learning_rate": 6.810773145842653e-05,
+      "loss": 1.2243,
+      "step": 337
+    },
+    {
+      "epoch": 0.042970084621191056,
+      "grad_norm": 0.20092110335826874,
+      "learning_rate": 6.755890846939454e-05,
+      "loss": 1.2279,
+      "step": 338
+    },
+    {
+      "epoch": 0.0430972150490644,
+      "grad_norm": 0.20261281728744507,
+      "learning_rate": 6.701117540235204e-05,
+      "loss": 1.2418,
+      "step": 339
+    },
+    {
+      "epoch": 0.043224345476937744,
+      "grad_norm": 0.20916980504989624,
+      "learning_rate": 6.646455065946386e-05,
+      "loss": 1.2205,
+      "step": 340
+    },
+    {
+      "epoch": 0.043351475904811095,
+      "grad_norm": 0.1868792027235031,
+      "learning_rate": 6.591905260565852e-05,
+      "loss": 1.2149,
+      "step": 341
+    },
+    {
+      "epoch": 0.04347860633268444,
+      "grad_norm": 0.19856908917427063,
+      "learning_rate": 6.537469956801128e-05,
+      "loss": 1.2518,
+      "step": 342
+    },
+    {
+      "epoch": 0.04360573676055778,
+      "grad_norm": 0.19585344195365906,
+      "learning_rate": 6.483150983512823e-05,
+      "loss": 1.2202,
+      "step": 343
+    },
+    {
+      "epoch": 0.04373286718843113,
+      "grad_norm": 0.19705970585346222,
+      "learning_rate": 6.428950165653204e-05,
+      "loss": 1.2701,
+      "step": 344
+    },
+    {
+      "epoch": 0.04385999761630448,
+      "grad_norm": 0.19830965995788574,
+      "learning_rate": 6.374869324204869e-05,
+      "loss": 1.2132,
+      "step": 345
+    },
+    {
+      "epoch": 0.04398712804417782,
+      "grad_norm": 0.20360921323299408,
+      "learning_rate": 6.320910276119576e-05,
+      "loss": 1.1979,
+      "step": 346
+    },
+    {
+      "epoch": 0.04411425847205117,
+      "grad_norm": 0.20261693000793457,
+      "learning_rate": 6.267074834257199e-05,
+      "loss": 1.2231,
+      "step": 347
+    },
+    {
+      "epoch": 0.044241388899924516,
+      "grad_norm": 0.19419489800930023,
+      "learning_rate": 6.213364807324818e-05,
+      "loss": 1.1575,
+      "step": 348
+    },
+    {
+      "epoch": 0.04436851932779786,
+      "grad_norm": 0.2025313675403595,
+      "learning_rate": 6.15978199981595e-05,
+      "loss": 1.2566,
+      "step": 349
+    },
+    {
+      "epoch": 0.04449564975567121,
+      "grad_norm": 0.19754880666732788,
+      "learning_rate": 6.106328211949928e-05,
+      "loss": 1.2821,
+      "step": 350
+    },
+    {
+      "epoch": 0.044622780183544555,
+      "grad_norm": 0.20343464612960815,
+      "learning_rate": 6.053005239611418e-05,
+      "loss": 1.2204,
+      "step": 351
+    },
+    {
+      "epoch": 0.0447499106114179,
+      "grad_norm": 0.20527192950248718,
+      "learning_rate": 5.999814874290084e-05,
+      "loss": 1.2513,
+      "step": 352
+    },
+    {
+      "epoch": 0.04487704103929125,
+      "grad_norm": 0.19321362674236298,
+      "learning_rate": 5.946758903020393e-05,
+      "loss": 1.2466,
+      "step": 353
+    },
+    {
+      "epoch": 0.045004171467164594,
+      "grad_norm": 0.20470896363258362,
+      "learning_rate": 5.893839108321584e-05,
+      "loss": 1.2846,
+      "step": 354
+    },
+    {
+      "epoch": 0.04513130189503794,
+      "grad_norm": 0.19084323942661285,
+      "learning_rate": 5.841057268137771e-05,
+      "loss": 1.2126,
+      "step": 355
+    },
+    {
+      "epoch": 0.04525843232291129,
+      "grad_norm": 0.19587008655071259,
+      "learning_rate": 5.7884151557782305e-05,
+      "loss": 1.1983,
+      "step": 356
+    },
+    {
+      "epoch": 0.04538556275078463,
+      "grad_norm": 0.20390859246253967,
+      "learning_rate": 5.735914539857798e-05,
+      "loss": 1.1981,
+      "step": 357
+    },
+    {
+      "epoch": 0.04551269317865798,
+      "grad_norm": 0.19584935903549194,
+      "learning_rate": 5.68355718423746e-05,
+      "loss": 1.2039,
+      "step": 358
+    },
+    {
+      "epoch": 0.04563982360653133,
+      "grad_norm": 0.19530071318149567,
+      "learning_rate": 5.6313448479650946e-05,
+      "loss": 1.236,
+      "step": 359
+    },
+    {
+      "epoch": 0.04576695403440467,
+      "grad_norm": 0.19659969210624695,
+      "learning_rate": 5.579279285216369e-05,
+      "loss": 1.1936,
+      "step": 360
+    },
+    {
+      "epoch": 0.045894084462278016,
+      "grad_norm": 0.1933298110961914,
+      "learning_rate": 5.527362245235805e-05,
+      "loss": 1.227,
+      "step": 361
+    },
+    {
+      "epoch": 0.04602121489015137,
+      "grad_norm": 0.20280398428440094,
+      "learning_rate": 5.475595472278024e-05,
+      "loss": 1.2644,
+      "step": 362
+    },
+    {
+      "epoch": 0.04614834531802471,
+      "grad_norm": 0.1918189376592636,
+      "learning_rate": 5.4239807055491135e-05,
+      "loss": 1.1495,
+      "step": 363
+    },
+    {
+      "epoch": 0.046275475745898055,
+      "grad_norm": 0.2044762223958969,
+      "learning_rate": 5.372519679148227e-05,
+      "loss": 1.241,
+      "step": 364
+    },
+    {
+      "epoch": 0.046402606173771406,
+      "grad_norm": 0.1972542256116867,
+      "learning_rate": 5.321214122009306e-05,
+      "loss": 1.1419,
+      "step": 365
+    },
+    {
+      "epoch": 0.04652973660164475,
+      "grad_norm": 0.20039339363574982,
+      "learning_rate": 5.270065757843e-05,
+      "loss": 1.2718,
+      "step": 366
+    },
+    {
+      "epoch": 0.046656867029518094,
+      "grad_norm": 0.1938110589981079,
+      "learning_rate": 5.219076305078749e-05,
+      "loss": 1.1947,
+      "step": 367
+    },
+    {
+      "epoch": 0.046783997457391445,
+      "grad_norm": 0.20640990138053894,
+      "learning_rate": 5.168247476807053e-05,
+      "loss": 1.1526,
+      "step": 368
+    },
+    {
+      "epoch": 0.04691112788526479,
+      "grad_norm": 0.198054239153862,
+      "learning_rate": 5.11758098072193e-05,
+      "loss": 1.1965,
+      "step": 369
+    },
+    {
+      "epoch": 0.04703825831313813,
+      "grad_norm": 0.19604484736919403,
+      "learning_rate": 5.067078519063514e-05,
+      "loss": 1.2568,
+      "step": 370
+    },
+    {
+      "epoch": 0.047165388741011484,
+      "grad_norm": 0.2095029056072235,
+      "learning_rate": 5.016741788560889e-05,
+      "loss": 1.2822,
+      "step": 371
+    },
+    {
+      "epoch": 0.04729251916888483,
+      "grad_norm": 0.20356985926628113,
+      "learning_rate": 4.9665724803750756e-05,
+      "loss": 1.1434,
+      "step": 372
+    },
+    {
+      "epoch": 0.04741964959675817,
+      "grad_norm": 0.19989654421806335,
+      "learning_rate": 4.9165722800422096e-05,
+      "loss": 1.2767,
+      "step": 373
+    },
+    {
+      "epoch": 0.04754678002463152,
+      "grad_norm": 0.19582509994506836,
+      "learning_rate": 4.86674286741693e-05,
+      "loss": 1.2693,
+      "step": 374
+    },
+    {
+      "epoch": 0.04767391045250487,
+      "grad_norm": 0.1962389498949051,
+      "learning_rate": 4.8170859166159144e-05,
+      "loss": 1.3266,
+      "step": 375
+    },
+    {
+      "epoch": 0.04780104088037821,
+      "grad_norm": 0.2056453377008438,
+      "learning_rate": 4.7676030959616526e-05,
+      "loss": 1.3004,
+      "step": 376
+    },
+    {
+      "epoch": 0.04792817130825156,
+      "grad_norm": 0.19587452709674835,
+      "learning_rate": 4.71829606792639e-05,
+      "loss": 1.2154,
+      "step": 377
+    },
+    {
+      "epoch": 0.048055301736124906,
+      "grad_norm": 0.19662117958068848,
+      "learning_rate": 4.669166489076283e-05,
+      "loss": 1.2434,
+      "step": 378
+    },
+    {
+      "epoch": 0.04818243216399825,
+      "grad_norm": 0.19508899748325348,
+      "learning_rate": 4.620216010015724e-05,
+      "loss": 1.2319,
+      "step": 379
+    },
+    {
+      "epoch": 0.0483095625918716,
+      "grad_norm": 0.19653861224651337,
+      "learning_rate": 4.571446275331903e-05,
+      "loss": 1.2006,
+      "step": 380
+    },
+    {
+      "epoch": 0.048436693019744945,
+      "grad_norm": 0.1971856951713562,
+      "learning_rate": 4.5228589235395436e-05,
+      "loss": 1.2937,
+      "step": 381
+    },
+    {
+      "epoch": 0.04856382344761829,
+      "grad_norm": 0.2165059596300125,
+      "learning_rate": 4.4744555870258694e-05,
+      "loss": 1.2722,
+      "step": 382
+    },
+    {
+      "epoch": 0.04869095387549164,
+      "grad_norm": 0.20020557940006256,
+      "learning_rate": 4.4262378919957413e-05,
+      "loss": 1.1947,
+      "step": 383
+    },
+    {
+      "epoch": 0.048818084303364984,
+      "grad_norm": 0.19455446302890778,
+      "learning_rate": 4.378207458417035e-05,
+      "loss": 1.1956,
+      "step": 384
+    },
+    {
+      "epoch": 0.04894521473123833,
+      "grad_norm": 0.202660471200943,
+      "learning_rate": 4.3303658999662086e-05,
+      "loss": 1.2553,
+      "step": 385
+    },
+    {
+      "epoch": 0.04907234515911168,
+      "grad_norm": 0.19681531190872192,
+      "learning_rate": 4.282714823974088e-05,
+      "loss": 1.2031,
+      "step": 386
+    },
+    {
+      "epoch": 0.04919947558698502,
+      "grad_norm": 0.20613734424114227,
+      "learning_rate": 4.2352558313718795e-05,
+      "loss": 1.2384,
+      "step": 387
+    },
+    {
+      "epoch": 0.04932660601485837,
+      "grad_norm": 0.1990024596452713,
+      "learning_rate": 4.1879905166373614e-05,
+      "loss": 1.2184,
+      "step": 388
+    },
+    {
+      "epoch": 0.04945373644273172,
+      "grad_norm": 0.21309691667556763,
+      "learning_rate": 4.140920467741325e-05,
+      "loss": 1.1853,
+      "step": 389
+    },
+    {
+      "epoch": 0.04958086687060506,
+      "grad_norm": 0.19488035142421722,
+      "learning_rate": 4.094047266094225e-05,
+      "loss": 1.1804,
+      "step": 390
+    },
+    {
+      "epoch": 0.049707997298478405,
+      "grad_norm": 0.19738849997520447,
+      "learning_rate": 4.047372486493054e-05,
+      "loss": 1.2534,
+      "step": 391
+    },
+    {
+      "epoch": 0.049835127726351756,
+      "grad_norm": 0.20008018612861633,
+      "learning_rate": 4.0008976970684176e-05,
+      "loss": 1.2723,
+      "step": 392
+    },
+    {
+      "epoch": 0.0499622581542251,
+      "grad_norm": 0.19521461427211761,
+      "learning_rate": 3.954624459231866e-05,
+      "loss": 1.1705,
+      "step": 393
+    },
+    {
+      "epoch": 0.050089388582098444,
+      "grad_norm": 0.20466111600399017,
+      "learning_rate": 3.908554327623425e-05,
+      "loss": 1.154,
+      "step": 394
+    },
+    {
+      "epoch": 0.050216519009971795,
+      "grad_norm": 0.2047969251871109,
+      "learning_rate": 3.8626888500593695e-05,
+      "loss": 1.2139,
+      "step": 395
+    },
+    {
+      "epoch": 0.05034364943784514,
+      "grad_norm": 0.1980600655078888,
+      "learning_rate": 3.817029567480228e-05,
+      "loss": 1.279,
+      "step": 396
+    },
+    {
+      "epoch": 0.05047077986571848,
+      "grad_norm": 0.20217813551425934,
+      "learning_rate": 3.771578013898996e-05,
+      "loss": 1.2561,
+      "step": 397
+    },
+    {
+      "epoch": 0.050597910293591834,
+      "grad_norm": 0.1985122561454773,
+      "learning_rate": 3.726335716349612e-05,
+      "loss": 1.2778,
+      "step": 398
+    },
+    {
+      "epoch": 0.05072504072146518,
+      "grad_norm": 0.19889195263385773,
+      "learning_rate": 3.681304194835641e-05,
+      "loss": 1.3225,
+      "step": 399
+    },
+    {
+      "epoch": 0.05085217114933852,
+      "grad_norm": 0.19213935732841492,
+      "learning_rate": 3.6364849622792266e-05,
+      "loss": 1.2308,
+      "step": 400
+    },
+    {
+      "epoch": 0.05085217114933852,
+      "eval_loss": 1.2253398895263672,
+      "eval_runtime": 1257.7786,
+      "eval_samples_per_second": 3.975,
+      "eval_steps_per_second": 0.994,
+      "step": 400
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.145600090918093e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null