Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +133 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3fd864bbe67d6da90cc5bda1e550d1ac8c1a1f80f4f0f3b1e9b1ab21db78ca9
 size 599689368

 version https://git-lfs.github.com/spec/v1
+oid sha256:928cf2edd71caf351ebc90e657c55e687ea53f55df2d3e0f185d16524bfe7b4c
 size 599689368

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d80781512712077700f2d3a2d0d034448979e9df7b5a5d4e1f1ce5c0f1bfdd4f
-size 404854552

 version https://git-lfs.github.com/spec/v1
+oid sha256:f13253f2b0fb38ec20b2cefc02c8e84216eedbddd3cde1f83bfd77addf58484e
+size 404854808

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a273ee62316e02b9a4e88216f8672d92839273f7781c4a93389d224775e64c2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:216c0e407dde1861ae8bd56458455e443750821353d5f0f196d4a06ed8661cbd
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e994acda9463ad5e79f11759cee0746e6d525c82215e6ea2f53a57491ac0869b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b536b0fa0a634e1c6dfafee7987ec2b47c88eb25b052693577be75945f4ed90
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.450271606445312,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.2830188679245283,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -144,6 +144,135 @@
       "eval_samples_per_second": 21.569,
       "eval_steps_per_second": 5.41,
       "step": 150
     }
   ],
   "logging_steps": 10,
@@ -158,7 +287,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -172,7 +301,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.156569026789376e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.450271606445312,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.5660377358490566,
   "eval_steps": 50,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.569,
       "eval_steps_per_second": 5.41,
       "step": 150
+    },
+    {
+      "epoch": 0.3018867924528302,
+      "grad_norm": 31.868478775024414,
+      "learning_rate": 0.0001697631521134985,
+      "loss": 38.3331,
+      "step": 160
+    },
+    {
+      "epoch": 0.32075471698113206,
+      "grad_norm": 25.038724899291992,
+      "learning_rate": 0.00016585113790650388,
+      "loss": 24.7657,
+      "step": 170
+    },
+    {
+      "epoch": 0.33962264150943394,
+      "grad_norm": 53.333683013916016,
+      "learning_rate": 0.0001617524614946192,
+      "loss": 23.8673,
+      "step": 180
+    },
+    {
+      "epoch": 0.3584905660377358,
+      "grad_norm": 183.1505889892578,
+      "learning_rate": 0.0001574787410214407,
+      "loss": 12.851,
+      "step": 190
+    },
+    {
+      "epoch": 0.37735849056603776,
+      "grad_norm": 199.62698364257812,
+      "learning_rate": 0.00015304209081197425,
+      "loss": 3.7041,
+      "step": 200
+    },
+    {
+      "epoch": 0.37735849056603776,
+      "eval_loss": 10.561553001403809,
+      "eval_runtime": 41.4742,
+      "eval_samples_per_second": 21.531,
+      "eval_steps_per_second": 5.401,
+      "step": 200
+    },
+    {
+      "epoch": 0.39622641509433965,
+      "grad_norm": 30.381412506103516,
+      "learning_rate": 0.00014845508703326504,
+      "loss": 36.4437,
+      "step": 210
+    },
+    {
+      "epoch": 0.41509433962264153,
+      "grad_norm": 25.108572006225586,
+      "learning_rate": 0.00014373073204588556,
+      "loss": 24.811,
+      "step": 220
+    },
+    {
+      "epoch": 0.4339622641509434,
+      "grad_norm": 25.86770248413086,
+      "learning_rate": 0.00013888241754733208,
+      "loss": 22.9303,
+      "step": 230
+    },
+    {
+      "epoch": 0.4528301886792453,
+      "grad_norm": 83.9566650390625,
+      "learning_rate": 0.00013392388661180303,
+      "loss": 14.6497,
+      "step": 240
+    },
+    {
+      "epoch": 0.4716981132075472,
+      "grad_norm": 46.80604934692383,
+      "learning_rate": 0.0001288691947339621,
+      "loss": 3.5489,
+      "step": 250
+    },
+    {
+      "epoch": 0.4716981132075472,
+      "eval_loss": 13.115553855895996,
+      "eval_runtime": 41.4443,
+      "eval_samples_per_second": 21.547,
+      "eval_steps_per_second": 5.405,
+      "step": 250
+    },
+    {
+      "epoch": 0.49056603773584906,
+      "grad_norm": 60.517303466796875,
+      "learning_rate": 0.0001237326699871115,
+      "loss": 52.7818,
+      "step": 260
+    },
+    {
+      "epoch": 0.5094339622641509,
+      "grad_norm": 88.43193817138672,
+      "learning_rate": 0.00011852887240871145,
+      "loss": 28.1595,
+      "step": 270
+    },
+    {
+      "epoch": 0.5283018867924528,
+      "grad_norm": 39.37873458862305,
+      "learning_rate": 0.00011327255272837221,
+      "loss": 23.8529,
+      "step": 280
+    },
+    {
+      "epoch": 0.5471698113207547,
+      "grad_norm": 118.77758026123047,
+      "learning_rate": 0.00010797861055530831,
+      "loss": 14.4378,
+      "step": 290
+    },
+    {
+      "epoch": 0.5660377358490566,
+      "grad_norm": 136.2257537841797,
+      "learning_rate": 0.00010266205214377748,
+      "loss": 4.6485,
+      "step": 300
+    },
+    {
+      "epoch": 0.5660377358490566,
+      "eval_loss": 15.816683769226074,
+      "eval_runtime": 41.4418,
+      "eval_samples_per_second": 21.548,
+      "eval_steps_per_second": 5.405,
+      "step": 300
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 2.313138053578752e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null