Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c2f4df89c706bd795cf61cd880b8c847603a75f25b065b1e0fc555b0bbde2cb
 size 144805440

 version https://git-lfs.github.com/spec/v1
+oid sha256:d754ba46403471642fb3eec991a13b30626e411f350e52e7b0ba85dcbe5df5ad
 size 144805440

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b9c2881cf3ec93417fc2d54ac97ba9a7c9dbe98604cb5c93c40f55f7e7463a5
 size 74291604

 version https://git-lfs.github.com/spec/v1
+oid sha256:10a96b370d1bef983712eb142fc43f6f0f1d0870efa1c253d6c733799a46f741
 size 74291604

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc138860ab105b276c76a34cb2106056fc94aa4b282c5d34dee59a18265e2ee0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e9068a537e5910a69bd8d8437185b23968169fbf47b4ea54829c00092157a9c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bf32963594d89a0ceccb5a8224effbfcb1f1a9bd636238e18e9c6df35eff9ac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:640f62f8d5c8ab2ac2c8a2097dccc04a34c4b29def0309df243ff8ccd6710772
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2906723022460938,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.01271304278733463,
   "eval_steps": 100,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -723,6 +723,714 @@
       "eval_samples_per_second": 3.973,
       "eval_steps_per_second": 0.993,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -751,7 +1459,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.0364000227295232e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.2551084756851196,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.02542608557466926,
   "eval_steps": 100,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.973,
       "eval_steps_per_second": 0.993,
       "step": 100
+    },
+    {
+      "epoch": 0.012840173215207978,
+      "grad_norm": 0.1781390905380249,
+      "learning_rate": 0.0001864086844042209,
+      "loss": 1.3021,
+      "step": 101
+    },
+    {
+      "epoch": 0.012967303643081324,
+      "grad_norm": 0.17100100219249725,
+      "learning_rate": 0.00018611548212485647,
+      "loss": 1.2574,
+      "step": 102
+    },
+    {
+      "epoch": 0.01309443407095467,
+      "grad_norm": 0.18398095667362213,
+      "learning_rate": 0.00018581938662725632,
+      "loss": 1.2839,
+      "step": 103
+    },
+    {
+      "epoch": 0.013221564498828017,
+      "grad_norm": 0.18981115520000458,
+      "learning_rate": 0.00018552040785932845,
+      "loss": 1.3149,
+      "step": 104
+    },
+    {
+      "epoch": 0.013348694926701363,
+      "grad_norm": 0.18872378766536713,
+      "learning_rate": 0.00018521855586584995,
+      "loss": 1.279,
+      "step": 105
+    },
+    {
+      "epoch": 0.013475825354574708,
+      "grad_norm": 0.1824631690979004,
+      "learning_rate": 0.00018491384078812959,
+      "loss": 1.2743,
+      "step": 106
+    },
+    {
+      "epoch": 0.013602955782448056,
+      "grad_norm": 0.1971443146467209,
+      "learning_rate": 0.000184606272863667,
+      "loss": 1.3365,
+      "step": 107
+    },
+    {
+      "epoch": 0.013730086210321402,
+      "grad_norm": 0.19964328408241272,
+      "learning_rate": 0.00018429586242580884,
+      "loss": 1.3184,
+      "step": 108
+    },
+    {
+      "epoch": 0.013857216638194747,
+      "grad_norm": 0.17624543607234955,
+      "learning_rate": 0.00018398261990340152,
+      "loss": 1.2755,
+      "step": 109
+    },
+    {
+      "epoch": 0.013984347066068095,
+      "grad_norm": 0.18599238991737366,
+      "learning_rate": 0.00018366655582044094,
+      "loss": 1.3025,
+      "step": 110
+    },
+    {
+      "epoch": 0.01411147749394144,
+      "grad_norm": 0.19051305949687958,
+      "learning_rate": 0.00018334768079571884,
+      "loss": 1.351,
+      "step": 111
+    },
+    {
+      "epoch": 0.014238607921814786,
+      "grad_norm": 0.1858106255531311,
+      "learning_rate": 0.00018302600554246601,
+      "loss": 1.2386,
+      "step": 112
+    },
+    {
+      "epoch": 0.014365738349688134,
+      "grad_norm": 0.17598244547843933,
+      "learning_rate": 0.00018270154086799239,
+      "loss": 1.2687,
+      "step": 113
+    },
+    {
+      "epoch": 0.01449286877756148,
+      "grad_norm": 0.18105947971343994,
+      "learning_rate": 0.00018237429767332405,
+      "loss": 1.2843,
+      "step": 114
+    },
+    {
+      "epoch": 0.014619999205434825,
+      "grad_norm": 0.18796177208423615,
+      "learning_rate": 0.00018204428695283687,
+      "loss": 1.2999,
+      "step": 115
+    },
+    {
+      "epoch": 0.014747129633308173,
+      "grad_norm": 0.18702763319015503,
+      "learning_rate": 0.00018171151979388714,
+      "loss": 1.2391,
+      "step": 116
+    },
+    {
+      "epoch": 0.014874260061181518,
+      "grad_norm": 0.17469799518585205,
+      "learning_rate": 0.00018137600737643913,
+      "loss": 1.2915,
+      "step": 117
+    },
+    {
+      "epoch": 0.015001390489054864,
+      "grad_norm": 0.1871766746044159,
+      "learning_rate": 0.00018103776097268942,
+      "loss": 1.2429,
+      "step": 118
+    },
+    {
+      "epoch": 0.015128520916928212,
+      "grad_norm": 0.18426093459129333,
+      "learning_rate": 0.00018069679194668826,
+      "loss": 1.2678,
+      "step": 119
+    },
+    {
+      "epoch": 0.015255651344801557,
+      "grad_norm": 0.1830713450908661,
+      "learning_rate": 0.0001803531117539577,
+      "loss": 1.3231,
+      "step": 120
+    },
+    {
+      "epoch": 0.015382781772674903,
+      "grad_norm": 0.19156108796596527,
+      "learning_rate": 0.00018000673194110668,
+      "loss": 1.3426,
+      "step": 121
+    },
+    {
+      "epoch": 0.01550991220054825,
+      "grad_norm": 0.18232569098472595,
+      "learning_rate": 0.00017965766414544326,
+      "loss": 1.2227,
+      "step": 122
+    },
+    {
+      "epoch": 0.015637042628421596,
+      "grad_norm": 0.18696987628936768,
+      "learning_rate": 0.00017930592009458352,
+      "loss": 1.2933,
+      "step": 123
+    },
+    {
+      "epoch": 0.015764173056294944,
+      "grad_norm": 0.18148070573806763,
+      "learning_rate": 0.00017895151160605757,
+      "loss": 1.3598,
+      "step": 124
+    },
+    {
+      "epoch": 0.015891303484168288,
+      "grad_norm": 0.1859319657087326,
+      "learning_rate": 0.00017859445058691247,
+      "loss": 1.2688,
+      "step": 125
+    },
+    {
+      "epoch": 0.016018433912041635,
+      "grad_norm": 0.18133966624736786,
+      "learning_rate": 0.00017823474903331233,
+      "loss": 1.2912,
+      "step": 126
+    },
+    {
+      "epoch": 0.016145564339914983,
+      "grad_norm": 0.16695751249790192,
+      "learning_rate": 0.0001778724190301351,
+      "loss": 1.2772,
+      "step": 127
+    },
+    {
+      "epoch": 0.016272694767788327,
+      "grad_norm": 0.17694084346294403,
+      "learning_rate": 0.0001775074727505667,
+      "loss": 1.2998,
+      "step": 128
+    },
+    {
+      "epoch": 0.016399825195661674,
+      "grad_norm": 0.18545518815517426,
+      "learning_rate": 0.0001771399224556919,
+      "loss": 1.2996,
+      "step": 129
+    },
+    {
+      "epoch": 0.01652695562353502,
+      "grad_norm": 0.1763446033000946,
+      "learning_rate": 0.00017676978049408263,
+      "loss": 1.2942,
+      "step": 130
+    },
+    {
+      "epoch": 0.016654086051408366,
+      "grad_norm": 0.1751178801059723,
+      "learning_rate": 0.00017639705930138272,
+      "loss": 1.2491,
+      "step": 131
+    },
+    {
+      "epoch": 0.016781216479281713,
+      "grad_norm": 0.17463481426239014,
+      "learning_rate": 0.00017602177139989044,
+      "loss": 1.3015,
+      "step": 132
+    },
+    {
+      "epoch": 0.01690834690715506,
+      "grad_norm": 0.1884208619594574,
+      "learning_rate": 0.0001756439293981377,
+      "loss": 1.2555,
+      "step": 133
+    },
+    {
+      "epoch": 0.017035477335028405,
+      "grad_norm": 0.1824871301651001,
+      "learning_rate": 0.00017526354599046635,
+      "loss": 1.3321,
+      "step": 134
+    },
+    {
+      "epoch": 0.017162607762901752,
+      "grad_norm": 0.17852945625782013,
+      "learning_rate": 0.00017488063395660177,
+      "loss": 1.2134,
+      "step": 135
+    },
+    {
+      "epoch": 0.0172897381907751,
+      "grad_norm": 0.17903351783752441,
+      "learning_rate": 0.00017449520616122344,
+      "loss": 1.202,
+      "step": 136
+    },
+    {
+      "epoch": 0.017416868618648444,
+      "grad_norm": 0.19624289870262146,
+      "learning_rate": 0.00017410727555353282,
+      "loss": 1.2983,
+      "step": 137
+    },
+    {
+      "epoch": 0.01754399904652179,
+      "grad_norm": 0.20271572470664978,
+      "learning_rate": 0.00017371685516681825,
+      "loss": 1.331,
+      "step": 138
+    },
+    {
+      "epoch": 0.01767112947439514,
+      "grad_norm": 0.19160455465316772,
+      "learning_rate": 0.00017332395811801707,
+      "loss": 1.2325,
+      "step": 139
+    },
+    {
+      "epoch": 0.017798259902268482,
+      "grad_norm": 0.19286282360553741,
+      "learning_rate": 0.00017292859760727493,
+      "loss": 1.3632,
+      "step": 140
+    },
+    {
+      "epoch": 0.01792539033014183,
+      "grad_norm": 0.18525561690330505,
+      "learning_rate": 0.00017253078691750227,
+      "loss": 1.302,
+      "step": 141
+    },
+    {
+      "epoch": 0.018052520758015177,
+      "grad_norm": 0.17999610304832458,
+      "learning_rate": 0.00017213053941392818,
+      "loss": 1.2617,
+      "step": 142
+    },
+    {
+      "epoch": 0.01817965118588852,
+      "grad_norm": 0.1817435920238495,
+      "learning_rate": 0.00017172786854365116,
+      "loss": 1.285,
+      "step": 143
+    },
+    {
+      "epoch": 0.01830678161376187,
+      "grad_norm": 0.18393941223621368,
+      "learning_rate": 0.00017132278783518756,
+      "loss": 1.2033,
+      "step": 144
+    },
+    {
+      "epoch": 0.018433912041635216,
+      "grad_norm": 0.18280182778835297,
+      "learning_rate": 0.00017091531089801694,
+      "loss": 1.2454,
+      "step": 145
+    },
+    {
+      "epoch": 0.01856104246950856,
+      "grad_norm": 0.17269238829612732,
+      "learning_rate": 0.00017050545142212483,
+      "loss": 1.2137,
+      "step": 146
+    },
+    {
+      "epoch": 0.018688172897381908,
+      "grad_norm": 0.18515561521053314,
+      "learning_rate": 0.00017009322317754278,
+      "loss": 1.2876,
+      "step": 147
+    },
+    {
+      "epoch": 0.018815303325255255,
+      "grad_norm": 0.18649280071258545,
+      "learning_rate": 0.0001696786400138859,
+      "loss": 1.3279,
+      "step": 148
+    },
+    {
+      "epoch": 0.0189424337531286,
+      "grad_norm": 0.18008284270763397,
+      "learning_rate": 0.00016926171585988727,
+      "loss": 1.1943,
+      "step": 149
+    },
+    {
+      "epoch": 0.019069564181001947,
+      "grad_norm": 0.18855896592140198,
+      "learning_rate": 0.00016884246472293016,
+      "loss": 1.3458,
+      "step": 150
+    },
+    {
+      "epoch": 0.019196694608875294,
+      "grad_norm": 0.18721222877502441,
+      "learning_rate": 0.00016842090068857742,
+      "loss": 1.205,
+      "step": 151
+    },
+    {
+      "epoch": 0.019323825036748638,
+      "grad_norm": 0.18609726428985596,
+      "learning_rate": 0.00016799703792009827,
+      "loss": 1.3147,
+      "step": 152
+    },
+    {
+      "epoch": 0.019450955464621986,
+      "grad_norm": 0.18827542662620544,
+      "learning_rate": 0.00016757089065799226,
+      "loss": 1.2053,
+      "step": 153
+    },
+    {
+      "epoch": 0.019578085892495333,
+      "grad_norm": 0.19211921095848083,
+      "learning_rate": 0.00016714247321951106,
+      "loss": 1.2881,
+      "step": 154
+    },
+    {
+      "epoch": 0.019705216320368677,
+      "grad_norm": 0.1911146342754364,
+      "learning_rate": 0.0001667117999981774,
+      "loss": 1.2841,
+      "step": 155
+    },
+    {
+      "epoch": 0.019832346748242025,
+      "grad_norm": 0.1876746416091919,
+      "learning_rate": 0.00016627888546330138,
+      "loss": 1.2795,
+      "step": 156
+    },
+    {
+      "epoch": 0.019959477176115372,
+      "grad_norm": 0.18275220692157745,
+      "learning_rate": 0.00016584374415949443,
+      "loss": 1.2646,
+      "step": 157
+    },
+    {
+      "epoch": 0.020086607603988716,
+      "grad_norm": 0.19240595400333405,
+      "learning_rate": 0.0001654063907061807,
+      "loss": 1.2286,
+      "step": 158
+    },
+    {
+      "epoch": 0.020213738031862064,
+      "grad_norm": 0.17621144652366638,
+      "learning_rate": 0.00016496683979710575,
+      "loss": 1.2623,
+      "step": 159
+    },
+    {
+      "epoch": 0.02034086845973541,
+      "grad_norm": 0.18566247820854187,
+      "learning_rate": 0.000164525106199843,
+      "loss": 1.2915,
+      "step": 160
+    },
+    {
+      "epoch": 0.020467998887608755,
+      "grad_norm": 0.19843867421150208,
+      "learning_rate": 0.00016408120475529763,
+      "loss": 1.1703,
+      "step": 161
+    },
+    {
+      "epoch": 0.020595129315482102,
+      "grad_norm": 0.20230089128017426,
+      "learning_rate": 0.00016363515037720773,
+      "loss": 1.274,
+      "step": 162
+    },
+    {
+      "epoch": 0.02072225974335545,
+      "grad_norm": 0.1874382644891739,
+      "learning_rate": 0.00016318695805164359,
+      "loss": 1.267,
+      "step": 163
+    },
+    {
+      "epoch": 0.020849390171228794,
+      "grad_norm": 0.19301468133926392,
+      "learning_rate": 0.0001627366428365039,
+      "loss": 1.3385,
+      "step": 164
+    },
+    {
+      "epoch": 0.02097652059910214,
+      "grad_norm": 0.1960678994655609,
+      "learning_rate": 0.00016228421986101005,
+      "loss": 1.2469,
+      "step": 165
+    },
+    {
+      "epoch": 0.02110365102697549,
+      "grad_norm": 0.2149035483598709,
+      "learning_rate": 0.00016182970432519772,
+      "loss": 1.2695,
+      "step": 166
+    },
+    {
+      "epoch": 0.021230781454848833,
+      "grad_norm": 0.1928316354751587,
+      "learning_rate": 0.00016137311149940633,
+      "loss": 1.2581,
+      "step": 167
+    },
+    {
+      "epoch": 0.02135791188272218,
+      "grad_norm": 0.18403369188308716,
+      "learning_rate": 0.0001609144567237658,
+      "loss": 1.2872,
+      "step": 168
+    },
+    {
+      "epoch": 0.021485042310595528,
+      "grad_norm": 0.18688054382801056,
+      "learning_rate": 0.00016045375540768136,
+      "loss": 1.2762,
+      "step": 169
+    },
+    {
+      "epoch": 0.021612172738468872,
+      "grad_norm": 0.19875864684581757,
+      "learning_rate": 0.00015999102302931585,
+      "loss": 1.2773,
+      "step": 170
+    },
+    {
+      "epoch": 0.02173930316634222,
+      "grad_norm": 0.19474861025810242,
+      "learning_rate": 0.0001595262751350695,
+      "loss": 1.2329,
+      "step": 171
+    },
+    {
+      "epoch": 0.021866433594215567,
+      "grad_norm": 0.1946505606174469,
+      "learning_rate": 0.00015905952733905775,
+      "loss": 1.1726,
+      "step": 172
+    },
+    {
+      "epoch": 0.02199356402208891,
+      "grad_norm": 0.18479324877262115,
+      "learning_rate": 0.00015859079532258677,
+      "loss": 1.3177,
+      "step": 173
+    },
+    {
+      "epoch": 0.022120694449962258,
+      "grad_norm": 0.19268646836280823,
+      "learning_rate": 0.00015812009483362642,
+      "loss": 1.2721,
+      "step": 174
+    },
+    {
+      "epoch": 0.022247824877835606,
+      "grad_norm": 0.18371957540512085,
+      "learning_rate": 0.0001576474416862812,
+      "loss": 1.3083,
+      "step": 175
+    },
+    {
+      "epoch": 0.02237495530570895,
+      "grad_norm": 0.1987624615430832,
+      "learning_rate": 0.00015717285176025913,
+      "loss": 1.2582,
+      "step": 176
+    },
+    {
+      "epoch": 0.022502085733582297,
+      "grad_norm": 0.19360652565956116,
+      "learning_rate": 0.00015669634100033797,
+      "loss": 1.2597,
+      "step": 177
+    },
+    {
+      "epoch": 0.022629216161455645,
+      "grad_norm": 0.1875244826078415,
+      "learning_rate": 0.00015621792541582966,
+      "loss": 1.2637,
+      "step": 178
+    },
+    {
+      "epoch": 0.02275634658932899,
+      "grad_norm": 0.19594229757785797,
+      "learning_rate": 0.00015573762108004262,
+      "loss": 1.2907,
+      "step": 179
+    },
+    {
+      "epoch": 0.022883477017202336,
+      "grad_norm": 0.1935066133737564,
+      "learning_rate": 0.00015525544412974132,
+      "loss": 1.2446,
+      "step": 180
+    },
+    {
+      "epoch": 0.023010607445075684,
+      "grad_norm": 0.19178606569766998,
+      "learning_rate": 0.0001547714107646046,
+      "loss": 1.2644,
+      "step": 181
+    },
+    {
+      "epoch": 0.023137737872949028,
+      "grad_norm": 0.18824580311775208,
+      "learning_rate": 0.00015428553724668103,
+      "loss": 1.2592,
+      "step": 182
+    },
+    {
+      "epoch": 0.023264868300822375,
+      "grad_norm": 0.1857818067073822,
+      "learning_rate": 0.00015379783989984277,
+      "loss": 1.2547,
+      "step": 183
+    },
+    {
+      "epoch": 0.023391998728695722,
+      "grad_norm": 0.18491147458553314,
+      "learning_rate": 0.00015330833510923718,
+      "loss": 1.3073,
+      "step": 184
+    },
+    {
+      "epoch": 0.023519129156569066,
+      "grad_norm": 0.19134363532066345,
+      "learning_rate": 0.00015281703932073612,
+      "loss": 1.2456,
+      "step": 185
+    },
+    {
+      "epoch": 0.023646259584442414,
+      "grad_norm": 0.18579505383968353,
+      "learning_rate": 0.0001523239690403835,
+      "loss": 1.2626,
+      "step": 186
+    },
+    {
+      "epoch": 0.02377339001231576,
+      "grad_norm": 0.18687140941619873,
+      "learning_rate": 0.0001518291408338409,
+      "loss": 1.2795,
+      "step": 187
+    },
+    {
+      "epoch": 0.023900520440189105,
+      "grad_norm": 0.1869836449623108,
+      "learning_rate": 0.00015133257132583073,
+      "loss": 1.2111,
+      "step": 188
+    },
+    {
+      "epoch": 0.024027650868062453,
+      "grad_norm": 0.18433886766433716,
+      "learning_rate": 0.00015083427719957793,
+      "loss": 1.1969,
+      "step": 189
+    },
+    {
+      "epoch": 0.0241547812959358,
+      "grad_norm": 0.19012001156806946,
+      "learning_rate": 0.0001503342751962493,
+      "loss": 1.2973,
+      "step": 190
+    },
+    {
+      "epoch": 0.024281911723809144,
+      "grad_norm": 0.18975861370563507,
+      "learning_rate": 0.00014983258211439117,
+      "loss": 1.2964,
+      "step": 191
+    },
+    {
+      "epoch": 0.024409042151682492,
+      "grad_norm": 0.17685554921627045,
+      "learning_rate": 0.0001493292148093649,
+      "loss": 1.2763,
+      "step": 192
+    },
+    {
+      "epoch": 0.02453617257955584,
+      "grad_norm": 0.19333194196224213,
+      "learning_rate": 0.00014882419019278075,
+      "loss": 1.3203,
+      "step": 193
+    },
+    {
+      "epoch": 0.024663303007429183,
+      "grad_norm": 0.19778768718242645,
+      "learning_rate": 0.00014831752523192948,
+      "loss": 1.3204,
+      "step": 194
+    },
+    {
+      "epoch": 0.02479043343530253,
+      "grad_norm": 0.1869363635778427,
+      "learning_rate": 0.00014780923694921255,
+      "loss": 1.2258,
+      "step": 195
+    },
+    {
+      "epoch": 0.024917563863175878,
+      "grad_norm": 0.17671674489974976,
+      "learning_rate": 0.00014729934242157004,
+      "loss": 1.1667,
+      "step": 196
+    },
+    {
+      "epoch": 0.025044694291049222,
+      "grad_norm": 0.1893490105867386,
+      "learning_rate": 0.00014678785877990697,
+      "loss": 1.3572,
+      "step": 197
+    },
+    {
+      "epoch": 0.02517182471892257,
+      "grad_norm": 0.19606593251228333,
+      "learning_rate": 0.00014627480320851774,
+      "loss": 1.2507,
+      "step": 198
+    },
+    {
+      "epoch": 0.025298955146795917,
+      "grad_norm": 0.20087891817092896,
+      "learning_rate": 0.00014576019294450888,
+      "loss": 1.3149,
+      "step": 199
+    },
+    {
+      "epoch": 0.02542608557466926,
+      "grad_norm": 0.1857730895280838,
+      "learning_rate": 0.00014524404527721977,
+      "loss": 1.2893,
+      "step": 200
+    },
+    {
+      "epoch": 0.02542608557466926,
+      "eval_loss": 1.2551084756851196,
+      "eval_runtime": 1258.1994,
+      "eval_samples_per_second": 3.974,
+      "eval_steps_per_second": 0.993,
+      "step": 200
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.0728000454590464e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null