Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:08fc2eb4e2f0efc377f3065fe0ed16dee5f1c97107e0f748c48fd49f7257cb3e
 size 201892112

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a2ee4420bd6cb53dbae7f02f04d34af3034c36c6107c17e64fccc40b1296251
 size 201892112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e068b48dda9e0c302972c72dd4138373aef0f7f0a75b92ca990fa04885f166dd
 size 102864548

 version https://git-lfs.github.com/spec/v1
+oid sha256:98bc37f6187fc846337e5ffe99401504cc47247e04e0803aa36e6d7d933273e6
 size 102864548

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c06149525c44ea6d8edf7a015c720bfbce856fe0fc17ae05707281ee481137fc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4fb64d0de4c2ba2d2144595ed8c57dfeffe46759d1c2e86b675198cfb9c7e3b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:191b4f74b8892fe464b31b446bc6f50032359ce22cb38236d5fdccf47f27920e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9abccd3ade815397c3a4e9cae178fd4a326a690915052661d8621974d592484a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.7382230758666992,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.4267425320056899,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 51.958,
       "eval_steps_per_second": 12.99,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9599165336125440.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.7155262231826782,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.5689900426742532,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 51.958,
       "eval_steps_per_second": 12.99,
       "step": 150
+    },
+    {
+      "epoch": 0.4295874822190612,
+      "grad_norm": 3.621522903442383,
+      "learning_rate": 2.583589473684211e-05,
+      "loss": 1.4262,
+      "step": 151
+    },
+    {
+      "epoch": 0.43243243243243246,
+      "grad_norm": 8.995145797729492,
+      "learning_rate": 2.530863157894737e-05,
+      "loss": 2.3063,
+      "step": 152
+    },
+    {
+      "epoch": 0.4352773826458037,
+      "grad_norm": 10.451045989990234,
+      "learning_rate": 2.4781368421052633e-05,
+      "loss": 2.4968,
+      "step": 153
+    },
+    {
+      "epoch": 0.43812233285917496,
+      "grad_norm": 5.187588691711426,
+      "learning_rate": 2.4254105263157896e-05,
+      "loss": 2.4841,
+      "step": 154
+    },
+    {
+      "epoch": 0.44096728307254623,
+      "grad_norm": 5.8949785232543945,
+      "learning_rate": 2.372684210526316e-05,
+      "loss": 2.3787,
+      "step": 155
+    },
+    {
+      "epoch": 0.4438122332859175,
+      "grad_norm": 6.260373592376709,
+      "learning_rate": 2.3199578947368422e-05,
+      "loss": 2.3634,
+      "step": 156
+    },
+    {
+      "epoch": 0.4466571834992888,
+      "grad_norm": 5.643683910369873,
+      "learning_rate": 2.2672315789473688e-05,
+      "loss": 2.6186,
+      "step": 157
+    },
+    {
+      "epoch": 0.44950213371266,
+      "grad_norm": 4.2553815841674805,
+      "learning_rate": 2.2145052631578948e-05,
+      "loss": 1.9248,
+      "step": 158
+    },
+    {
+      "epoch": 0.4523470839260313,
+      "grad_norm": 6.2050299644470215,
+      "learning_rate": 2.1617789473684214e-05,
+      "loss": 2.6924,
+      "step": 159
+    },
+    {
+      "epoch": 0.45519203413940257,
+      "grad_norm": 5.974190711975098,
+      "learning_rate": 2.1090526315789473e-05,
+      "loss": 1.9615,
+      "step": 160
+    },
+    {
+      "epoch": 0.45803698435277385,
+      "grad_norm": 4.2593817710876465,
+      "learning_rate": 2.056326315789474e-05,
+      "loss": 1.9096,
+      "step": 161
+    },
+    {
+      "epoch": 0.46088193456614507,
+      "grad_norm": 4.31808614730835,
+      "learning_rate": 2.0036000000000003e-05,
+      "loss": 1.6687,
+      "step": 162
+    },
+    {
+      "epoch": 0.46372688477951635,
+      "grad_norm": 3.8735194206237793,
+      "learning_rate": 1.9508736842105266e-05,
+      "loss": 1.6971,
+      "step": 163
+    },
+    {
+      "epoch": 0.4665718349928876,
+      "grad_norm": 4.893298149108887,
+      "learning_rate": 1.898147368421053e-05,
+      "loss": 1.91,
+      "step": 164
+    },
+    {
+      "epoch": 0.4694167852062589,
+      "grad_norm": 5.0595879554748535,
+      "learning_rate": 1.8454210526315788e-05,
+      "loss": 1.7698,
+      "step": 165
+    },
+    {
+      "epoch": 0.4722617354196302,
+      "grad_norm": 5.496082305908203,
+      "learning_rate": 1.7926947368421054e-05,
+      "loss": 2.2101,
+      "step": 166
+    },
+    {
+      "epoch": 0.4751066856330014,
+      "grad_norm": 3.9685187339782715,
+      "learning_rate": 1.7399684210526317e-05,
+      "loss": 1.7115,
+      "step": 167
+    },
+    {
+      "epoch": 0.4779516358463727,
+      "grad_norm": 3.9401421546936035,
+      "learning_rate": 1.687242105263158e-05,
+      "loss": 1.5449,
+      "step": 168
+    },
+    {
+      "epoch": 0.48079658605974396,
+      "grad_norm": 8.87083911895752,
+      "learning_rate": 1.6345157894736843e-05,
+      "loss": 1.2196,
+      "step": 169
+    },
+    {
+      "epoch": 0.48364153627311524,
+      "grad_norm": 2.921708106994629,
+      "learning_rate": 1.5817894736842106e-05,
+      "loss": 1.2074,
+      "step": 170
+    },
+    {
+      "epoch": 0.4864864864864865,
+      "grad_norm": 3.984959602355957,
+      "learning_rate": 1.529063157894737e-05,
+      "loss": 1.8422,
+      "step": 171
+    },
+    {
+      "epoch": 0.48933143669985774,
+      "grad_norm": 3.140026807785034,
+      "learning_rate": 1.4763368421052632e-05,
+      "loss": 1.1818,
+      "step": 172
+    },
+    {
+      "epoch": 0.492176386913229,
+      "grad_norm": 4.081362724304199,
+      "learning_rate": 1.4236105263157895e-05,
+      "loss": 1.5623,
+      "step": 173
+    },
+    {
+      "epoch": 0.4950213371266003,
+      "grad_norm": 3.8090360164642334,
+      "learning_rate": 1.370884210526316e-05,
+      "loss": 1.7273,
+      "step": 174
+    },
+    {
+      "epoch": 0.49786628733997157,
+      "grad_norm": 4.01789665222168,
+      "learning_rate": 1.318157894736842e-05,
+      "loss": 1.5097,
+      "step": 175
+    },
+    {
+      "epoch": 0.5007112375533428,
+      "grad_norm": 3.7943084239959717,
+      "learning_rate": 1.2654315789473685e-05,
+      "loss": 1.503,
+      "step": 176
+    },
+    {
+      "epoch": 0.5035561877667141,
+      "grad_norm": 4.650012493133545,
+      "learning_rate": 1.2127052631578948e-05,
+      "loss": 1.3104,
+      "step": 177
+    },
+    {
+      "epoch": 0.5064011379800853,
+      "grad_norm": 3.3703951835632324,
+      "learning_rate": 1.1599789473684211e-05,
+      "loss": 1.6413,
+      "step": 178
+    },
+    {
+      "epoch": 0.5092460881934566,
+      "grad_norm": 3.091628074645996,
+      "learning_rate": 1.1072526315789474e-05,
+      "loss": 1.1957,
+      "step": 179
+    },
+    {
+      "epoch": 0.5120910384068279,
+      "grad_norm": 3.4542126655578613,
+      "learning_rate": 1.0545263157894737e-05,
+      "loss": 1.5123,
+      "step": 180
+    },
+    {
+      "epoch": 0.5149359886201992,
+      "grad_norm": 3.7005858421325684,
+      "learning_rate": 1.0018000000000001e-05,
+      "loss": 1.5385,
+      "step": 181
+    },
+    {
+      "epoch": 0.5177809388335705,
+      "grad_norm": 3.9003567695617676,
+      "learning_rate": 9.490736842105264e-06,
+      "loss": 2.0467,
+      "step": 182
+    },
+    {
+      "epoch": 0.5206258890469416,
+      "grad_norm": 4.515058994293213,
+      "learning_rate": 8.963473684210527e-06,
+      "loss": 1.8916,
+      "step": 183
+    },
+    {
+      "epoch": 0.5234708392603129,
+      "grad_norm": 4.124413967132568,
+      "learning_rate": 8.43621052631579e-06,
+      "loss": 2.1368,
+      "step": 184
+    },
+    {
+      "epoch": 0.5263157894736842,
+      "grad_norm": 3.431734085083008,
+      "learning_rate": 7.908947368421053e-06,
+      "loss": 1.7875,
+      "step": 185
+    },
+    {
+      "epoch": 0.5291607396870555,
+      "grad_norm": 3.2502102851867676,
+      "learning_rate": 7.381684210526316e-06,
+      "loss": 1.4588,
+      "step": 186
+    },
+    {
+      "epoch": 0.5320056899004267,
+      "grad_norm": 5.192526340484619,
+      "learning_rate": 6.85442105263158e-06,
+      "loss": 2.0349,
+      "step": 187
+    },
+    {
+      "epoch": 0.534850640113798,
+      "grad_norm": 5.307839393615723,
+      "learning_rate": 6.3271578947368425e-06,
+      "loss": 1.9088,
+      "step": 188
+    },
+    {
+      "epoch": 0.5376955903271693,
+      "grad_norm": 3.8783345222473145,
+      "learning_rate": 5.7998947368421054e-06,
+      "loss": 1.8848,
+      "step": 189
+    },
+    {
+      "epoch": 0.5405405405405406,
+      "grad_norm": 3.423018217086792,
+      "learning_rate": 5.272631578947368e-06,
+      "loss": 1.5687,
+      "step": 190
+    },
+    {
+      "epoch": 0.5433854907539118,
+      "grad_norm": 3.4464221000671387,
+      "learning_rate": 4.745368421052632e-06,
+      "loss": 1.74,
+      "step": 191
+    },
+    {
+      "epoch": 0.5462304409672831,
+      "grad_norm": 4.226917266845703,
+      "learning_rate": 4.218105263157895e-06,
+      "loss": 1.5311,
+      "step": 192
+    },
+    {
+      "epoch": 0.5490753911806543,
+      "grad_norm": 3.3859219551086426,
+      "learning_rate": 3.690842105263158e-06,
+      "loss": 1.7129,
+      "step": 193
+    },
+    {
+      "epoch": 0.5519203413940256,
+      "grad_norm": 4.049556255340576,
+      "learning_rate": 3.1635789473684213e-06,
+      "loss": 1.793,
+      "step": 194
+    },
+    {
+      "epoch": 0.5547652916073968,
+      "grad_norm": 3.619959592819214,
+      "learning_rate": 2.636315789473684e-06,
+      "loss": 1.6843,
+      "step": 195
+    },
+    {
+      "epoch": 0.5576102418207681,
+      "grad_norm": 3.7586238384246826,
+      "learning_rate": 2.1090526315789475e-06,
+      "loss": 1.686,
+      "step": 196
+    },
+    {
+      "epoch": 0.5604551920341394,
+      "grad_norm": 5.048487186431885,
+      "learning_rate": 1.5817894736842106e-06,
+      "loss": 1.9462,
+      "step": 197
+    },
+    {
+      "epoch": 0.5633001422475107,
+      "grad_norm": 3.8546323776245117,
+      "learning_rate": 1.0545263157894738e-06,
+      "loss": 1.8565,
+      "step": 198
+    },
+    {
+      "epoch": 0.566145092460882,
+      "grad_norm": 4.808628082275391,
+      "learning_rate": 5.272631578947369e-07,
+      "loss": 1.9341,
+      "step": 199
+    },
+    {
+      "epoch": 0.5689900426742532,
+      "grad_norm": 5.030535697937012,
+      "learning_rate": 0.0,
+      "loss": 2.5086,
+      "step": 200
+    },
+    {
+      "epoch": 0.5689900426742532,
+      "eval_loss": 1.7155262231826782,
+      "eval_runtime": 2.8497,
+      "eval_samples_per_second": 51.936,
+      "eval_steps_per_second": 12.984,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.279888711483392e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null