Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da612bdb1acb3543b4af8f90e4bfc87baa942de156923e57670d64eb56f3c2f1
 size 289452128

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4c3e7f19619bd780dcd3a8b6f5dbd55d7b509733d64567061dca3e768f3f221
 size 289452128

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:25c57c588ab0252c1b736b33a7340c87c53e821805d955c0a064e620bd215f39
 size 579097218

 version https://git-lfs.github.com/spec/v1
+oid sha256:642a915b1da364c25b3332245547ca9e7f6289514845e7a1763949bc55a21d33
 size 579097218

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bcf25d4131a3f427115e36b649ec6ccb9d6250f1ccda2d87f43fe6b29c3f1d97
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:664ffae075a365888d62cfa9c3307fdff8086c47bcfd18081fc0f83415749d7b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4c9c807f0681c8b7e53ada9b6ec3dba530d303de7da0d0a0562a3d8d0bbba08
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8451731204986572,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.5063291139240507,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 29.882,
       "eval_steps_per_second": 14.971,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.087649161281536e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8411803841590881,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.6751054852320675,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 29.882,
       "eval_steps_per_second": 14.971,
       "step": 150
+    },
+    {
+      "epoch": 0.509704641350211,
+      "grad_norm": 0.09330438822507858,
+      "learning_rate": 1.7197048550474643e-05,
+      "loss": 0.8577,
+      "step": 151
+    },
+    {
+      "epoch": 0.5130801687763713,
+      "grad_norm": 0.09590033441781998,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 0.869,
+      "step": 152
+    },
+    {
+      "epoch": 0.5164556962025316,
+      "grad_norm": 0.088727205991745,
+      "learning_rate": 1.5900081996875083e-05,
+      "loss": 0.879,
+      "step": 153
+    },
+    {
+      "epoch": 0.5198312236286919,
+      "grad_norm": 0.09195546060800552,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 0.8552,
+      "step": 154
+    },
+    {
+      "epoch": 0.5232067510548524,
+      "grad_norm": 0.08427764475345612,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.8277,
+      "step": 155
+    },
+    {
+      "epoch": 0.5265822784810127,
+      "grad_norm": 0.08744396269321442,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 0.8976,
+      "step": 156
+    },
+    {
+      "epoch": 0.529957805907173,
+      "grad_norm": 0.08676258474588394,
+      "learning_rate": 1.3432314919041478e-05,
+      "loss": 0.8411,
+      "step": 157
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.08297256380319595,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 0.8808,
+      "step": 158
+    },
+    {
+      "epoch": 0.5367088607594936,
+      "grad_norm": 0.0838639959692955,
+      "learning_rate": 1.22645209888614e-05,
+      "loss": 0.8363,
+      "step": 159
+    },
+    {
+      "epoch": 0.540084388185654,
+      "grad_norm": 0.08180323243141174,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 0.8883,
+      "step": 160
+    },
+    {
+      "epoch": 0.5434599156118144,
+      "grad_norm": 0.08074133098125458,
+      "learning_rate": 1.1142701927151456e-05,
+      "loss": 0.8663,
+      "step": 161
+    },
+    {
+      "epoch": 0.5468354430379747,
+      "grad_norm": 0.08030061423778534,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 0.8244,
+      "step": 162
+    },
+    {
+      "epoch": 0.550210970464135,
+      "grad_norm": 0.08170463889837265,
+      "learning_rate": 1.006822449763537e-05,
+      "loss": 0.8731,
+      "step": 163
+    },
+    {
+      "epoch": 0.5535864978902953,
+      "grad_norm": 0.07635810226202011,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.8278,
+      "step": 164
+    },
+    {
+      "epoch": 0.5569620253164557,
+      "grad_norm": 0.07828008383512497,
+      "learning_rate": 9.042397785550405e-06,
+      "loss": 0.8729,
+      "step": 165
+    },
+    {
+      "epoch": 0.560337552742616,
+      "grad_norm": 0.07783474028110504,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 0.8833,
+      "step": 166
+    },
+    {
+      "epoch": 0.5637130801687764,
+      "grad_norm": 0.07819754630327225,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 0.8043,
+      "step": 167
+    },
+    {
+      "epoch": 0.5670886075949367,
+      "grad_norm": 0.08014006167650223,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 0.806,
+      "step": 168
+    },
+    {
+      "epoch": 0.570464135021097,
+      "grad_norm": 0.07956880331039429,
+      "learning_rate": 7.1416349648943894e-06,
+      "loss": 0.8337,
+      "step": 169
+    },
+    {
+      "epoch": 0.5738396624472574,
+      "grad_norm": 0.08377084136009216,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.8492,
+      "step": 170
+    },
+    {
+      "epoch": 0.5772151898734177,
+      "grad_norm": 0.08549128472805023,
+      "learning_rate": 6.269014643030213e-06,
+      "loss": 0.8261,
+      "step": 171
+    },
+    {
+      "epoch": 0.580590717299578,
+      "grad_norm": 0.08281944692134857,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 0.8808,
+      "step": 172
+    },
+    {
+      "epoch": 0.5839662447257384,
+      "grad_norm": 0.08437817543745041,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 0.8875,
+      "step": 173
+    },
+    {
+      "epoch": 0.5873417721518988,
+      "grad_norm": 0.08079797029495239,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 0.8156,
+      "step": 174
+    },
+    {
+      "epoch": 0.5907172995780591,
+      "grad_norm": 0.07663010060787201,
+      "learning_rate": 4.684610648167503e-06,
+      "loss": 0.8254,
+      "step": 175
+    },
+    {
+      "epoch": 0.5940928270042194,
+      "grad_norm": 0.0759519711136818,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.7782,
+      "step": 176
+    },
+    {
+      "epoch": 0.5974683544303797,
+      "grad_norm": 0.07866054773330688,
+      "learning_rate": 3.974757327377981e-06,
+      "loss": 0.8416,
+      "step": 177
+    },
+    {
+      "epoch": 0.60084388185654,
+      "grad_norm": 0.07912636548280716,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 0.8541,
+      "step": 178
+    },
+    {
+      "epoch": 0.6042194092827005,
+      "grad_norm": 0.07650936394929886,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 0.8441,
+      "step": 179
+    },
+    {
+      "epoch": 0.6075949367088608,
+      "grad_norm": 0.08117911964654922,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.8574,
+      "step": 180
+    },
+    {
+      "epoch": 0.6109704641350211,
+      "grad_norm": 0.08503540605306625,
+      "learning_rate": 2.724071220034158e-06,
+      "loss": 0.8645,
+      "step": 181
+    },
+    {
+      "epoch": 0.6143459915611814,
+      "grad_norm": 0.08228060603141785,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.8464,
+      "step": 182
+    },
+    {
+      "epoch": 0.6177215189873417,
+      "grad_norm": 0.08500353246927261,
+      "learning_rate": 2.1847622018482283e-06,
+      "loss": 0.8792,
+      "step": 183
+    },
+    {
+      "epoch": 0.6210970464135022,
+      "grad_norm": 0.08499061316251755,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 0.8615,
+      "step": 184
+    },
+    {
+      "epoch": 0.6244725738396625,
+      "grad_norm": 0.08179894089698792,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.8145,
+      "step": 185
+    },
+    {
+      "epoch": 0.6278481012658228,
+      "grad_norm": 0.08442049473524094,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 0.8551,
+      "step": 186
+    },
+    {
+      "epoch": 0.6312236286919831,
+      "grad_norm": 0.08597325533628464,
+      "learning_rate": 1.2814967607382432e-06,
+      "loss": 0.842,
+      "step": 187
+    },
+    {
+      "epoch": 0.6345991561181434,
+      "grad_norm": 0.08621402084827423,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.8368,
+      "step": 188
+    },
+    {
+      "epoch": 0.6379746835443038,
+      "grad_norm": 0.08607050776481628,
+      "learning_rate": 9.186408276168013e-07,
+      "loss": 0.799,
+      "step": 189
+    },
+    {
+      "epoch": 0.6413502109704642,
+      "grad_norm": 0.08330494910478592,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 0.8317,
+      "step": 190
+    },
+    {
+      "epoch": 0.6447257383966245,
+      "grad_norm": 0.08824269473552704,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 0.8349,
+      "step": 191
+    },
+    {
+      "epoch": 0.6481012658227848,
+      "grad_norm": 0.0824505165219307,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 0.8046,
+      "step": 192
+    },
+    {
+      "epoch": 0.6514767932489451,
+      "grad_norm": 0.08412570506334305,
+      "learning_rate": 3.7269241793390085e-07,
+      "loss": 0.833,
+      "step": 193
+    },
+    {
+      "epoch": 0.6548523206751055,
+      "grad_norm": 0.0915130153298378,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.874,
+      "step": 194
+    },
+    {
+      "epoch": 0.6582278481012658,
+      "grad_norm": 0.08780040591955185,
+      "learning_rate": 1.9026509541272275e-07,
+      "loss": 0.8074,
+      "step": 195
+    },
+    {
+      "epoch": 0.6616033755274262,
+      "grad_norm": 0.08696349710226059,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 0.7858,
+      "step": 196
+    },
+    {
+      "epoch": 0.6649789029535865,
+      "grad_norm": 0.09163117408752441,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 0.8216,
+      "step": 197
+    },
+    {
+      "epoch": 0.6683544303797468,
+      "grad_norm": 0.09099559485912323,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 0.8724,
+      "step": 198
+    },
+    {
+      "epoch": 0.6717299578059072,
+      "grad_norm": 0.09470181167125702,
+      "learning_rate": 7.615242180436522e-09,
+      "loss": 0.8745,
+      "step": 199
+    },
+    {
+      "epoch": 0.6751054852320675,
+      "grad_norm": 0.11328919231891632,
+      "learning_rate": 0.0,
+      "loss": 0.9074,
+      "step": 200
+    },
+    {
+      "epoch": 0.6751054852320675,
+      "eval_loss": 0.8411803841590881,
+      "eval_runtime": 16.6908,
+      "eval_samples_per_second": 29.897,
+      "eval_steps_per_second": 14.978,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.775259614779802e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null