Training in progress, step 162, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +88 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34eec548bcb0cf4cc8deb16ae9d57e350ba7e83d3565b490b67e7c9c8109cc91
 size 2373352

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe50a8e87d7e7c685703dc07d994a532211ac83774ee5c2ef9e70360c5db801c
 size 2373352

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fdbb94433eea1e8882e30a772af961f34988ac494568edd94429aa03a48aede4
 size 4830714

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e9122afebc77b0882c330e4f89c8a5852e33b8876416f13418c5d249e452980
 size 4830714

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b731a6453583e531adf674145c44813353f3e2c7ce0df9322ff44e828902c9f9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:28090ec23c7abd202faaf69014ca27c5fb4006ad37677fb4d0e4164b245b5727
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ed7454232ee2a5e2b25a5b6f82cc3a4462ff259aa0925317244942a2aecbea8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:37d3373f4006be09e15478c20075f16407c8e03504d4995b420a93433e54c5dc
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.879857063293457,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 2.8,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,90 @@
       "eval_samples_per_second": 98.658,
       "eval_steps_per_second": 24.936,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1196,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 165268684800000.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.879857063293457,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 3.027906976744186,
   "eval_steps": 50,
+  "global_step": 162,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 98.658,
       "eval_steps_per_second": 24.936,
       "step": 150
+    },
+    {
+      "epoch": 2.818604651162791,
+      "grad_norm": 0.136092871427536,
+      "learning_rate": 1.286667868385627e-06,
+      "loss": 11.8862,
+      "step": 151
+    },
+    {
+      "epoch": 2.8372093023255816,
+      "grad_norm": 0.140702024102211,
+      "learning_rate": 1.064157733632276e-06,
+      "loss": 11.8836,
+      "step": 152
+    },
+    {
+      "epoch": 2.855813953488372,
+      "grad_norm": 0.15527276694774628,
+      "learning_rate": 8.62551347632029e-07,
+      "loss": 11.88,
+      "step": 153
+    },
+    {
+      "epoch": 2.874418604651163,
+      "grad_norm": 0.15577322244644165,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 11.8743,
+      "step": 154
+    },
+    {
+      "epoch": 2.8930232558139535,
+      "grad_norm": 0.1479603350162506,
+      "learning_rate": 5.223853336398632e-07,
+      "loss": 11.8797,
+      "step": 155
+    },
+    {
+      "epoch": 2.911627906976744,
+      "grad_norm": 0.15958736836910248,
+      "learning_rate": 3.839710131477492e-07,
+      "loss": 11.8777,
+      "step": 156
+    },
+    {
+      "epoch": 2.9302325581395348,
+      "grad_norm": 0.20802263915538788,
+      "learning_rate": 2.667509943378721e-07,
+      "loss": 11.8832,
+      "step": 157
+    },
+    {
+      "epoch": 2.948837209302326,
+      "grad_norm": 0.23421141505241394,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 11.8736,
+      "step": 158
+    },
+    {
+      "epoch": 2.967441860465116,
+      "grad_norm": 0.21079802513122559,
+      "learning_rate": 9.60850767065924e-08,
+      "loss": 11.8682,
+      "step": 159
+    },
+    {
+      "epoch": 2.986046511627907,
+      "grad_norm": 0.14213858544826508,
+      "learning_rate": 4.2712080634949024e-08,
+      "loss": 11.8843,
+      "step": 160
+    },
+    {
+      "epoch": 3.0093023255813955,
+      "grad_norm": 0.24924080073833466,
+      "learning_rate": 1.0679160603449534e-08,
+      "loss": 18.4815,
+      "step": 161
+    },
+    {
+      "epoch": 3.027906976744186,
+      "grad_norm": 0.14660771191120148,
+      "learning_rate": 0.0,
+      "loss": 11.3754,
+      "step": 162
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 178490179584000.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null