Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:240e719bcd4fd491c2237e23258c8e39bca047342b7cc8bc8fdd9f49df12bdb3
 size 1157746040

 version https://git-lfs.github.com/spec/v1
+oid sha256:6bf3f3a27cebadaa79096029d40b34edd398925aab694b8195b52fef3a2d83f7
 size 1157746040

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63b87cc3ac8b401765f602819af990d8bdfece651b5596734d6d6a8600928caf
 size 588699796

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ce79a681b2fbabc12ebb11e94940a9738f5b89d6019b63ffb38a222dcbb21a5
 size 588699796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a46691699b106230eff0c0cf4e82d75bf9752a9bea064a69a0f7866220b32ab
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfaaf62584334c674d97256a5169fefa7a76f1d283e335fd23fafd3ab45f10a5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:191b4f74b8892fe464b31b446bc6f50032359ce22cb38236d5fdccf47f27920e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9abccd3ade815397c3a4e9cae178fd4a326a690915052661d8621974d592484a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.6001591086387634,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.15128593040847202,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 8.797,
       "eval_steps_per_second": 2.21,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.2972871159853875e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.5863075852394104,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.20171457387796268,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.797,
       "eval_steps_per_second": 2.21,
       "step": 150
+    },
+    {
+      "epoch": 0.15229450327786181,
+      "grad_norm": 0.6352901458740234,
+      "learning_rate": 2.583589473684211e-05,
+      "loss": 0.5723,
+      "step": 151
+    },
+    {
+      "epoch": 0.15330307614725164,
+      "grad_norm": 0.6519777774810791,
+      "learning_rate": 2.530863157894737e-05,
+      "loss": 0.5986,
+      "step": 152
+    },
+    {
+      "epoch": 0.15431164901664146,
+      "grad_norm": 0.6912335753440857,
+      "learning_rate": 2.4781368421052633e-05,
+      "loss": 0.6372,
+      "step": 153
+    },
+    {
+      "epoch": 0.15532022188603126,
+      "grad_norm": 0.8081892728805542,
+      "learning_rate": 2.4254105263157896e-05,
+      "loss": 0.6308,
+      "step": 154
+    },
+    {
+      "epoch": 0.15632879475542108,
+      "grad_norm": 0.638839066028595,
+      "learning_rate": 2.372684210526316e-05,
+      "loss": 0.5209,
+      "step": 155
+    },
+    {
+      "epoch": 0.1573373676248109,
+      "grad_norm": 0.8660756945610046,
+      "learning_rate": 2.3199578947368422e-05,
+      "loss": 0.6188,
+      "step": 156
+    },
+    {
+      "epoch": 0.1583459404942007,
+      "grad_norm": 0.8016595244407654,
+      "learning_rate": 2.2672315789473688e-05,
+      "loss": 0.683,
+      "step": 157
+    },
+    {
+      "epoch": 0.15935451336359052,
+      "grad_norm": 0.7723732590675354,
+      "learning_rate": 2.2145052631578948e-05,
+      "loss": 0.5858,
+      "step": 158
+    },
+    {
+      "epoch": 0.16036308623298035,
+      "grad_norm": 0.7663549780845642,
+      "learning_rate": 2.1617789473684214e-05,
+      "loss": 0.6837,
+      "step": 159
+    },
+    {
+      "epoch": 0.16137165910237014,
+      "grad_norm": 0.6378078460693359,
+      "learning_rate": 2.1090526315789473e-05,
+      "loss": 0.5426,
+      "step": 160
+    },
+    {
+      "epoch": 0.16238023197175996,
+      "grad_norm": 0.6299136877059937,
+      "learning_rate": 2.056326315789474e-05,
+      "loss": 0.6205,
+      "step": 161
+    },
+    {
+      "epoch": 0.16338880484114976,
+      "grad_norm": 0.614180326461792,
+      "learning_rate": 2.0036000000000003e-05,
+      "loss": 0.645,
+      "step": 162
+    },
+    {
+      "epoch": 0.16439737771053958,
+      "grad_norm": 0.5849306583404541,
+      "learning_rate": 1.9508736842105266e-05,
+      "loss": 0.5969,
+      "step": 163
+    },
+    {
+      "epoch": 0.1654059505799294,
+      "grad_norm": 0.6921943426132202,
+      "learning_rate": 1.898147368421053e-05,
+      "loss": 0.6696,
+      "step": 164
+    },
+    {
+      "epoch": 0.1664145234493192,
+      "grad_norm": 0.6622946858406067,
+      "learning_rate": 1.8454210526315788e-05,
+      "loss": 0.5939,
+      "step": 165
+    },
+    {
+      "epoch": 0.16742309631870902,
+      "grad_norm": 0.5671906471252441,
+      "learning_rate": 1.7926947368421054e-05,
+      "loss": 0.4853,
+      "step": 166
+    },
+    {
+      "epoch": 0.16843166918809885,
+      "grad_norm": 0.5902379155158997,
+      "learning_rate": 1.7399684210526317e-05,
+      "loss": 0.5543,
+      "step": 167
+    },
+    {
+      "epoch": 0.16944024205748864,
+      "grad_norm": 0.6716185808181763,
+      "learning_rate": 1.687242105263158e-05,
+      "loss": 0.4996,
+      "step": 168
+    },
+    {
+      "epoch": 0.17044881492687847,
+      "grad_norm": 0.6331367492675781,
+      "learning_rate": 1.6345157894736843e-05,
+      "loss": 0.5686,
+      "step": 169
+    },
+    {
+      "epoch": 0.1714573877962683,
+      "grad_norm": 0.6758613586425781,
+      "learning_rate": 1.5817894736842106e-05,
+      "loss": 0.6115,
+      "step": 170
+    },
+    {
+      "epoch": 0.17246596066565809,
+      "grad_norm": 0.631222128868103,
+      "learning_rate": 1.529063157894737e-05,
+      "loss": 0.5845,
+      "step": 171
+    },
+    {
+      "epoch": 0.1734745335350479,
+      "grad_norm": 0.688679575920105,
+      "learning_rate": 1.4763368421052632e-05,
+      "loss": 0.5624,
+      "step": 172
+    },
+    {
+      "epoch": 0.17448310640443773,
+      "grad_norm": 0.6124164462089539,
+      "learning_rate": 1.4236105263157895e-05,
+      "loss": 0.5512,
+      "step": 173
+    },
+    {
+      "epoch": 0.17549167927382753,
+      "grad_norm": 0.7388119101524353,
+      "learning_rate": 1.370884210526316e-05,
+      "loss": 0.5659,
+      "step": 174
+    },
+    {
+      "epoch": 0.17650025214321735,
+      "grad_norm": 0.624447762966156,
+      "learning_rate": 1.318157894736842e-05,
+      "loss": 0.5343,
+      "step": 175
+    },
+    {
+      "epoch": 0.17750882501260717,
+      "grad_norm": 0.7680219411849976,
+      "learning_rate": 1.2654315789473685e-05,
+      "loss": 0.6469,
+      "step": 176
+    },
+    {
+      "epoch": 0.17851739788199697,
+      "grad_norm": 0.6990251541137695,
+      "learning_rate": 1.2127052631578948e-05,
+      "loss": 0.5686,
+      "step": 177
+    },
+    {
+      "epoch": 0.1795259707513868,
+      "grad_norm": 0.625948965549469,
+      "learning_rate": 1.1599789473684211e-05,
+      "loss": 0.5172,
+      "step": 178
+    },
+    {
+      "epoch": 0.1805345436207766,
+      "grad_norm": 0.7253612875938416,
+      "learning_rate": 1.1072526315789474e-05,
+      "loss": 0.648,
+      "step": 179
+    },
+    {
+      "epoch": 0.1815431164901664,
+      "grad_norm": 0.7192074060440063,
+      "learning_rate": 1.0545263157894737e-05,
+      "loss": 0.5826,
+      "step": 180
+    },
+    {
+      "epoch": 0.18255168935955624,
+      "grad_norm": 0.8760311603546143,
+      "learning_rate": 1.0018000000000001e-05,
+      "loss": 0.726,
+      "step": 181
+    },
+    {
+      "epoch": 0.18356026222894603,
+      "grad_norm": 0.7654669284820557,
+      "learning_rate": 9.490736842105264e-06,
+      "loss": 0.7193,
+      "step": 182
+    },
+    {
+      "epoch": 0.18456883509833585,
+      "grad_norm": 0.6888782382011414,
+      "learning_rate": 8.963473684210527e-06,
+      "loss": 0.5643,
+      "step": 183
+    },
+    {
+      "epoch": 0.18557740796772568,
+      "grad_norm": 0.7420192956924438,
+      "learning_rate": 8.43621052631579e-06,
+      "loss": 0.6996,
+      "step": 184
+    },
+    {
+      "epoch": 0.18658598083711547,
+      "grad_norm": 0.761364221572876,
+      "learning_rate": 7.908947368421053e-06,
+      "loss": 0.6823,
+      "step": 185
+    },
+    {
+      "epoch": 0.1875945537065053,
+      "grad_norm": 0.7957232594490051,
+      "learning_rate": 7.381684210526316e-06,
+      "loss": 0.6033,
+      "step": 186
+    },
+    {
+      "epoch": 0.18860312657589512,
+      "grad_norm": 0.7030571103096008,
+      "learning_rate": 6.85442105263158e-06,
+      "loss": 0.6799,
+      "step": 187
+    },
+    {
+      "epoch": 0.18961169944528491,
+      "grad_norm": 0.7977284789085388,
+      "learning_rate": 6.3271578947368425e-06,
+      "loss": 0.6251,
+      "step": 188
+    },
+    {
+      "epoch": 0.19062027231467474,
+      "grad_norm": 0.774628758430481,
+      "learning_rate": 5.7998947368421054e-06,
+      "loss": 0.5933,
+      "step": 189
+    },
+    {
+      "epoch": 0.19162884518406456,
+      "grad_norm": 0.7732921242713928,
+      "learning_rate": 5.272631578947368e-06,
+      "loss": 0.6884,
+      "step": 190
+    },
+    {
+      "epoch": 0.19263741805345436,
+      "grad_norm": 0.8167909979820251,
+      "learning_rate": 4.745368421052632e-06,
+      "loss": 0.6984,
+      "step": 191
+    },
+    {
+      "epoch": 0.19364599092284418,
+      "grad_norm": 0.7652740478515625,
+      "learning_rate": 4.218105263157895e-06,
+      "loss": 0.5643,
+      "step": 192
+    },
+    {
+      "epoch": 0.19465456379223398,
+      "grad_norm": 0.7798756957054138,
+      "learning_rate": 3.690842105263158e-06,
+      "loss": 0.5983,
+      "step": 193
+    },
+    {
+      "epoch": 0.1956631366616238,
+      "grad_norm": 0.894702136516571,
+      "learning_rate": 3.1635789473684213e-06,
+      "loss": 0.7333,
+      "step": 194
+    },
+    {
+      "epoch": 0.19667170953101362,
+      "grad_norm": 0.8882285952568054,
+      "learning_rate": 2.636315789473684e-06,
+      "loss": 0.7199,
+      "step": 195
+    },
+    {
+      "epoch": 0.19768028240040342,
+      "grad_norm": 1.022255778312683,
+      "learning_rate": 2.1090526315789475e-06,
+      "loss": 0.6327,
+      "step": 196
+    },
+    {
+      "epoch": 0.19868885526979324,
+      "grad_norm": 0.9281327128410339,
+      "learning_rate": 1.5817894736842106e-06,
+      "loss": 0.6983,
+      "step": 197
+    },
+    {
+      "epoch": 0.19969742813918306,
+      "grad_norm": 0.9630473256111145,
+      "learning_rate": 1.0545263157894738e-06,
+      "loss": 0.6424,
+      "step": 198
+    },
+    {
+      "epoch": 0.20070600100857286,
+      "grad_norm": 0.9315603971481323,
+      "learning_rate": 5.272631578947369e-07,
+      "loss": 0.6501,
+      "step": 199
+    },
+    {
+      "epoch": 0.20171457387796268,
+      "grad_norm": 1.1420410871505737,
+      "learning_rate": 0.0,
+      "loss": 0.7652,
+      "step": 200
+    },
+    {
+      "epoch": 0.20171457387796268,
+      "eval_loss": 0.5863075852394104,
+      "eval_runtime": 47.2866,
+      "eval_samples_per_second": 8.84,
+      "eval_steps_per_second": 2.221,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.733017139675136e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null