Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4834f3c035f84221a8754d3f1072da03a4472e2281a306a8600d4e9aa3a623ab
 size 224224

 version https://git-lfs.github.com/spec/v1
+oid sha256:54633b045fa3875795f5e3b282e7622e8d6fc1887620d7137e21adf0bdedcff9
 size 224224

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3f528934135eed76f0bd7f4d893156ba4a2c05e902932c54599590826fb44a5
 size 458278

 version https://git-lfs.github.com/spec/v1
+oid sha256:578416452e14be514a99d9286e95a5f5428c38a515851f529186ae1cfc1d81ef
 size 458278

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41cba3c6740b91479a26c28b90d7caa9f4b195401b129f72a5aea93a2222335a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:73ed1b5bb9723457092efbac7b1cf8e5382350824de5a4a887b2d3e65f0865fc
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:191b4f74b8892fe464b31b446bc6f50032359ce22cb38236d5fdccf47f27920e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9abccd3ade815397c3a4e9cae178fd4a326a690915052661d8621974d592484a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.320969581604004,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.3125,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 329.202,
       "eval_steps_per_second": 82.706,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 11143226327040.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.320320129394531,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.4166666666666667,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 329.202,
       "eval_steps_per_second": 82.706,
       "step": 150
+    },
+    {
+      "epoch": 0.3145833333333333,
+      "grad_norm": 0.37281185388565063,
+      "learning_rate": 2.583589473684211e-05,
+      "loss": 20.6498,
+      "step": 151
+    },
+    {
+      "epoch": 0.31666666666666665,
+      "grad_norm": 0.48406609892845154,
+      "learning_rate": 2.530863157894737e-05,
+      "loss": 20.6228,
+      "step": 152
+    },
+    {
+      "epoch": 0.31875,
+      "grad_norm": 0.39599233865737915,
+      "learning_rate": 2.4781368421052633e-05,
+      "loss": 20.638,
+      "step": 153
+    },
+    {
+      "epoch": 0.32083333333333336,
+      "grad_norm": 0.3863723874092102,
+      "learning_rate": 2.4254105263157896e-05,
+      "loss": 20.6314,
+      "step": 154
+    },
+    {
+      "epoch": 0.3229166666666667,
+      "grad_norm": 0.45764878392219543,
+      "learning_rate": 2.372684210526316e-05,
+      "loss": 20.6385,
+      "step": 155
+    },
+    {
+      "epoch": 0.325,
+      "grad_norm": 0.2955838441848755,
+      "learning_rate": 2.3199578947368422e-05,
+      "loss": 20.6307,
+      "step": 156
+    },
+    {
+      "epoch": 0.32708333333333334,
+      "grad_norm": 0.29116860032081604,
+      "learning_rate": 2.2672315789473688e-05,
+      "loss": 20.6354,
+      "step": 157
+    },
+    {
+      "epoch": 0.32916666666666666,
+      "grad_norm": 0.5557569861412048,
+      "learning_rate": 2.2145052631578948e-05,
+      "loss": 20.6324,
+      "step": 158
+    },
+    {
+      "epoch": 0.33125,
+      "grad_norm": 0.4483456313610077,
+      "learning_rate": 2.1617789473684214e-05,
+      "loss": 20.6235,
+      "step": 159
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 0.36201009154319763,
+      "learning_rate": 2.1090526315789473e-05,
+      "loss": 20.6236,
+      "step": 160
+    },
+    {
+      "epoch": 0.33541666666666664,
+      "grad_norm": 0.5224501490592957,
+      "learning_rate": 2.056326315789474e-05,
+      "loss": 20.6368,
+      "step": 161
+    },
+    {
+      "epoch": 0.3375,
+      "grad_norm": 0.4168667495250702,
+      "learning_rate": 2.0036000000000003e-05,
+      "loss": 20.6212,
+      "step": 162
+    },
+    {
+      "epoch": 0.33958333333333335,
+      "grad_norm": 0.422823041677475,
+      "learning_rate": 1.9508736842105266e-05,
+      "loss": 20.6554,
+      "step": 163
+    },
+    {
+      "epoch": 0.3416666666666667,
+      "grad_norm": 0.4180959165096283,
+      "learning_rate": 1.898147368421053e-05,
+      "loss": 20.632,
+      "step": 164
+    },
+    {
+      "epoch": 0.34375,
+      "grad_norm": 0.527528703212738,
+      "learning_rate": 1.8454210526315788e-05,
+      "loss": 20.6571,
+      "step": 165
+    },
+    {
+      "epoch": 0.3458333333333333,
+      "grad_norm": 0.6775944828987122,
+      "learning_rate": 1.7926947368421054e-05,
+      "loss": 20.6503,
+      "step": 166
+    },
+    {
+      "epoch": 0.34791666666666665,
+      "grad_norm": 0.45938971638679504,
+      "learning_rate": 1.7399684210526317e-05,
+      "loss": 20.6307,
+      "step": 167
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 0.581780731678009,
+      "learning_rate": 1.687242105263158e-05,
+      "loss": 20.5861,
+      "step": 168
+    },
+    {
+      "epoch": 0.35208333333333336,
+      "grad_norm": 0.46029457449913025,
+      "learning_rate": 1.6345157894736843e-05,
+      "loss": 20.6182,
+      "step": 169
+    },
+    {
+      "epoch": 0.3541666666666667,
+      "grad_norm": 0.4315298795700073,
+      "learning_rate": 1.5817894736842106e-05,
+      "loss": 20.617,
+      "step": 170
+    },
+    {
+      "epoch": 0.35625,
+      "grad_norm": 0.5391767621040344,
+      "learning_rate": 1.529063157894737e-05,
+      "loss": 20.6657,
+      "step": 171
+    },
+    {
+      "epoch": 0.35833333333333334,
+      "grad_norm": 0.7053831815719604,
+      "learning_rate": 1.4763368421052632e-05,
+      "loss": 20.6452,
+      "step": 172
+    },
+    {
+      "epoch": 0.36041666666666666,
+      "grad_norm": 0.3821829557418823,
+      "learning_rate": 1.4236105263157895e-05,
+      "loss": 20.6239,
+      "step": 173
+    },
+    {
+      "epoch": 0.3625,
+      "grad_norm": 0.41374513506889343,
+      "learning_rate": 1.370884210526316e-05,
+      "loss": 20.6571,
+      "step": 174
+    },
+    {
+      "epoch": 0.3645833333333333,
+      "grad_norm": 0.4170868694782257,
+      "learning_rate": 1.318157894736842e-05,
+      "loss": 20.6462,
+      "step": 175
+    },
+    {
+      "epoch": 0.36666666666666664,
+      "grad_norm": 0.338599294424057,
+      "learning_rate": 1.2654315789473685e-05,
+      "loss": 20.6363,
+      "step": 176
+    },
+    {
+      "epoch": 0.36875,
+      "grad_norm": 0.5105868577957153,
+      "learning_rate": 1.2127052631578948e-05,
+      "loss": 20.6538,
+      "step": 177
+    },
+    {
+      "epoch": 0.37083333333333335,
+      "grad_norm": 0.4912939965724945,
+      "learning_rate": 1.1599789473684211e-05,
+      "loss": 20.6367,
+      "step": 178
+    },
+    {
+      "epoch": 0.3729166666666667,
+      "grad_norm": 0.4326838552951813,
+      "learning_rate": 1.1072526315789474e-05,
+      "loss": 20.6502,
+      "step": 179
+    },
+    {
+      "epoch": 0.375,
+      "grad_norm": 0.4481593668460846,
+      "learning_rate": 1.0545263157894737e-05,
+      "loss": 20.6533,
+      "step": 180
+    },
+    {
+      "epoch": 0.3770833333333333,
+      "grad_norm": 0.5464130640029907,
+      "learning_rate": 1.0018000000000001e-05,
+      "loss": 20.6477,
+      "step": 181
+    },
+    {
+      "epoch": 0.37916666666666665,
+      "grad_norm": 0.42457976937294006,
+      "learning_rate": 9.490736842105264e-06,
+      "loss": 20.6598,
+      "step": 182
+    },
+    {
+      "epoch": 0.38125,
+      "grad_norm": 0.4240236282348633,
+      "learning_rate": 8.963473684210527e-06,
+      "loss": 20.6248,
+      "step": 183
+    },
+    {
+      "epoch": 0.38333333333333336,
+      "grad_norm": 0.5708866715431213,
+      "learning_rate": 8.43621052631579e-06,
+      "loss": 20.6368,
+      "step": 184
+    },
+    {
+      "epoch": 0.3854166666666667,
+      "grad_norm": 0.5419739484786987,
+      "learning_rate": 7.908947368421053e-06,
+      "loss": 20.6538,
+      "step": 185
+    },
+    {
+      "epoch": 0.3875,
+      "grad_norm": 0.5176281332969666,
+      "learning_rate": 7.381684210526316e-06,
+      "loss": 20.6526,
+      "step": 186
+    },
+    {
+      "epoch": 0.38958333333333334,
+      "grad_norm": 0.5326713919639587,
+      "learning_rate": 6.85442105263158e-06,
+      "loss": 20.6238,
+      "step": 187
+    },
+    {
+      "epoch": 0.39166666666666666,
+      "grad_norm": 0.7119190096855164,
+      "learning_rate": 6.3271578947368425e-06,
+      "loss": 20.6531,
+      "step": 188
+    },
+    {
+      "epoch": 0.39375,
+      "grad_norm": 0.5899550318717957,
+      "learning_rate": 5.7998947368421054e-06,
+      "loss": 20.6368,
+      "step": 189
+    },
+    {
+      "epoch": 0.3958333333333333,
+      "grad_norm": 0.5252934694290161,
+      "learning_rate": 5.272631578947368e-06,
+      "loss": 20.6664,
+      "step": 190
+    },
+    {
+      "epoch": 0.39791666666666664,
+      "grad_norm": 0.358653724193573,
+      "learning_rate": 4.745368421052632e-06,
+      "loss": 20.6579,
+      "step": 191
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.43110373616218567,
+      "learning_rate": 4.218105263157895e-06,
+      "loss": 20.6374,
+      "step": 192
+    },
+    {
+      "epoch": 0.40208333333333335,
+      "grad_norm": 0.45655491948127747,
+      "learning_rate": 3.690842105263158e-06,
+      "loss": 20.6361,
+      "step": 193
+    },
+    {
+      "epoch": 0.4041666666666667,
+      "grad_norm": 0.30297571420669556,
+      "learning_rate": 3.1635789473684213e-06,
+      "loss": 20.6386,
+      "step": 194
+    },
+    {
+      "epoch": 0.40625,
+      "grad_norm": 0.3377545177936554,
+      "learning_rate": 2.636315789473684e-06,
+      "loss": 20.6289,
+      "step": 195
+    },
+    {
+      "epoch": 0.4083333333333333,
+      "grad_norm": 0.5056354999542236,
+      "learning_rate": 2.1090526315789475e-06,
+      "loss": 20.6388,
+      "step": 196
+    },
+    {
+      "epoch": 0.41041666666666665,
+      "grad_norm": 0.6402460932731628,
+      "learning_rate": 1.5817894736842106e-06,
+      "loss": 20.6102,
+      "step": 197
+    },
+    {
+      "epoch": 0.4125,
+      "grad_norm": 0.6596386432647705,
+      "learning_rate": 1.0545263157894738e-06,
+      "loss": 20.6331,
+      "step": 198
+    },
+    {
+      "epoch": 0.41458333333333336,
+      "grad_norm": 0.6789427399635315,
+      "learning_rate": 5.272631578947369e-07,
+      "loss": 20.6559,
+      "step": 199
+    },
+    {
+      "epoch": 0.4166666666666667,
+      "grad_norm": 0.6088905334472656,
+      "learning_rate": 0.0,
+      "loss": 20.6439,
+      "step": 200
+    },
+    {
+      "epoch": 0.4166666666666667,
+      "eval_loss": 10.320320129394531,
+      "eval_runtime": 0.6112,
+      "eval_samples_per_second": 332.15,
+      "eval_steps_per_second": 83.447,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 14821833572352.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null