Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0b1e8f32654f39d5736467676d694f0de8f902e9c3ab48733ecd2da577a587c
 size 402688040

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa316b32500c5a640ac2cf82aee641387e9b0c46bffac547fbb8879c1a231cd7
 size 402688040

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7dc5619dd41fddb9056604f9b7193aafd6ad57258157832ed87dc519fc16d803
 size 204773716

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e5967d6cdb9ea200a8e75865f31a7131dc7c6cea077d286c22c286bacffb72e
 size 204773716

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3dffee4d50efec73c2b4bb3effe1f79f8f1bd981275e8475171ddab88d7850a2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0364bbc908c8e14798f388eb619876e43b95720e70096b4e518a18372483fafe
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01c5525f1d8420ca8a81a7fd2ec397a508131d03210dfd36c7ac5758b0e6313b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3e88ef6a2716260516e17223973d6a3b0a4c88bf12c72ed47e80e6f2a6782fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.5780511498451233,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.2631578947368421,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 12.968,
       "eval_steps_per_second": 3.242,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.893968039739392e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.5751292109489441,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.3508771929824561,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.968,
       "eval_steps_per_second": 3.242,
       "step": 150
+    },
+    {
+      "epoch": 0.2649122807017544,
+      "grad_norm": 0.17568722367286682,
+      "learning_rate": 2.589263157894737e-05,
+      "loss": 0.546,
+      "step": 151
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 0.3429137170314789,
+      "learning_rate": 2.536421052631579e-05,
+      "loss": 0.3165,
+      "step": 152
+    },
+    {
+      "epoch": 0.26842105263157895,
+      "grad_norm": 0.21474985778331757,
+      "learning_rate": 2.483578947368421e-05,
+      "loss": 0.415,
+      "step": 153
+    },
+    {
+      "epoch": 0.27017543859649124,
+      "grad_norm": 0.29766225814819336,
+      "learning_rate": 2.430736842105263e-05,
+      "loss": 0.5736,
+      "step": 154
+    },
+    {
+      "epoch": 0.2719298245614035,
+      "grad_norm": 0.268090158700943,
+      "learning_rate": 2.3778947368421052e-05,
+      "loss": 0.5472,
+      "step": 155
+    },
+    {
+      "epoch": 0.2736842105263158,
+      "grad_norm": 0.2689472436904907,
+      "learning_rate": 2.3250526315789473e-05,
+      "loss": 0.5009,
+      "step": 156
+    },
+    {
+      "epoch": 0.2754385964912281,
+      "grad_norm": 0.23201614618301392,
+      "learning_rate": 2.2722105263157894e-05,
+      "loss": 0.4179,
+      "step": 157
+    },
+    {
+      "epoch": 0.2771929824561403,
+      "grad_norm": 0.3329578936100006,
+      "learning_rate": 2.2193684210526316e-05,
+      "loss": 0.3145,
+      "step": 158
+    },
+    {
+      "epoch": 0.2789473684210526,
+      "grad_norm": 0.30881157517433167,
+      "learning_rate": 2.1665263157894737e-05,
+      "loss": 0.6029,
+      "step": 159
+    },
+    {
+      "epoch": 0.2807017543859649,
+      "grad_norm": 0.3386708199977875,
+      "learning_rate": 2.1136842105263158e-05,
+      "loss": 0.7325,
+      "step": 160
+    },
+    {
+      "epoch": 0.2824561403508772,
+      "grad_norm": 0.2562579810619354,
+      "learning_rate": 2.060842105263158e-05,
+      "loss": 0.402,
+      "step": 161
+    },
+    {
+      "epoch": 0.28421052631578947,
+      "grad_norm": 0.27218446135520935,
+      "learning_rate": 2.008e-05,
+      "loss": 0.4634,
+      "step": 162
+    },
+    {
+      "epoch": 0.28596491228070176,
+      "grad_norm": 0.340582013130188,
+      "learning_rate": 1.9551578947368422e-05,
+      "loss": 0.4829,
+      "step": 163
+    },
+    {
+      "epoch": 0.28771929824561404,
+      "grad_norm": 0.4173396825790405,
+      "learning_rate": 1.9023157894736843e-05,
+      "loss": 0.6958,
+      "step": 164
+    },
+    {
+      "epoch": 0.2894736842105263,
+      "grad_norm": 0.293707013130188,
+      "learning_rate": 1.849473684210526e-05,
+      "loss": 0.5717,
+      "step": 165
+    },
+    {
+      "epoch": 0.2912280701754386,
+      "grad_norm": 0.3465547561645508,
+      "learning_rate": 1.7966315789473686e-05,
+      "loss": 0.6538,
+      "step": 166
+    },
+    {
+      "epoch": 0.2929824561403509,
+      "grad_norm": 0.4900147020816803,
+      "learning_rate": 1.7437894736842107e-05,
+      "loss": 0.4976,
+      "step": 167
+    },
+    {
+      "epoch": 0.29473684210526313,
+      "grad_norm": 0.41154950857162476,
+      "learning_rate": 1.6909473684210525e-05,
+      "loss": 0.7079,
+      "step": 168
+    },
+    {
+      "epoch": 0.2964912280701754,
+      "grad_norm": 0.37281063199043274,
+      "learning_rate": 1.638105263157895e-05,
+      "loss": 0.5842,
+      "step": 169
+    },
+    {
+      "epoch": 0.2982456140350877,
+      "grad_norm": 0.2995673716068268,
+      "learning_rate": 1.5852631578947368e-05,
+      "loss": 0.4967,
+      "step": 170
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 0.35212984681129456,
+      "learning_rate": 1.532421052631579e-05,
+      "loss": 0.47,
+      "step": 171
+    },
+    {
+      "epoch": 0.3017543859649123,
+      "grad_norm": 0.3804338872432709,
+      "learning_rate": 1.4795789473684209e-05,
+      "loss": 0.6148,
+      "step": 172
+    },
+    {
+      "epoch": 0.30350877192982456,
+      "grad_norm": 0.35750070214271545,
+      "learning_rate": 1.4267368421052632e-05,
+      "loss": 0.6549,
+      "step": 173
+    },
+    {
+      "epoch": 0.30526315789473685,
+      "grad_norm": 0.3266410529613495,
+      "learning_rate": 1.3738947368421053e-05,
+      "loss": 0.6267,
+      "step": 174
+    },
+    {
+      "epoch": 0.30701754385964913,
+      "grad_norm": 0.40702515840530396,
+      "learning_rate": 1.3210526315789473e-05,
+      "loss": 0.6736,
+      "step": 175
+    },
+    {
+      "epoch": 0.3087719298245614,
+      "grad_norm": 0.35786283016204834,
+      "learning_rate": 1.2682105263157896e-05,
+      "loss": 0.6763,
+      "step": 176
+    },
+    {
+      "epoch": 0.3105263157894737,
+      "grad_norm": 0.3948259949684143,
+      "learning_rate": 1.2153684210526315e-05,
+      "loss": 0.6734,
+      "step": 177
+    },
+    {
+      "epoch": 0.312280701754386,
+      "grad_norm": 0.37104299664497375,
+      "learning_rate": 1.1625263157894737e-05,
+      "loss": 0.7661,
+      "step": 178
+    },
+    {
+      "epoch": 0.3140350877192982,
+      "grad_norm": 0.3601257801055908,
+      "learning_rate": 1.1096842105263158e-05,
+      "loss": 0.6679,
+      "step": 179
+    },
+    {
+      "epoch": 0.3157894736842105,
+      "grad_norm": 0.45755448937416077,
+      "learning_rate": 1.0568421052631579e-05,
+      "loss": 0.8599,
+      "step": 180
+    },
+    {
+      "epoch": 0.3175438596491228,
+      "grad_norm": 0.40678536891937256,
+      "learning_rate": 1.004e-05,
+      "loss": 0.7713,
+      "step": 181
+    },
+    {
+      "epoch": 0.3192982456140351,
+      "grad_norm": 0.3657882809638977,
+      "learning_rate": 9.511578947368422e-06,
+      "loss": 0.6317,
+      "step": 182
+    },
+    {
+      "epoch": 0.32105263157894737,
+      "grad_norm": 0.752341091632843,
+      "learning_rate": 8.983157894736843e-06,
+      "loss": 0.7552,
+      "step": 183
+    },
+    {
+      "epoch": 0.32280701754385965,
+      "grad_norm": 0.4538853168487549,
+      "learning_rate": 8.454736842105263e-06,
+      "loss": 0.7126,
+      "step": 184
+    },
+    {
+      "epoch": 0.32456140350877194,
+      "grad_norm": 0.5402305722236633,
+      "learning_rate": 7.926315789473684e-06,
+      "loss": 0.7343,
+      "step": 185
+    },
+    {
+      "epoch": 0.3263157894736842,
+      "grad_norm": 0.43681031465530396,
+      "learning_rate": 7.397894736842104e-06,
+      "loss": 0.6744,
+      "step": 186
+    },
+    {
+      "epoch": 0.3280701754385965,
+      "grad_norm": 0.3734006881713867,
+      "learning_rate": 6.8694736842105265e-06,
+      "loss": 0.4868,
+      "step": 187
+    },
+    {
+      "epoch": 0.3298245614035088,
+      "grad_norm": 0.4675087034702301,
+      "learning_rate": 6.341052631578948e-06,
+      "loss": 0.69,
+      "step": 188
+    },
+    {
+      "epoch": 0.33157894736842103,
+      "grad_norm": 0.41553711891174316,
+      "learning_rate": 5.812631578947368e-06,
+      "loss": 0.6899,
+      "step": 189
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 0.42875123023986816,
+      "learning_rate": 5.2842105263157896e-06,
+      "loss": 0.5848,
+      "step": 190
+    },
+    {
+      "epoch": 0.3350877192982456,
+      "grad_norm": 0.49000978469848633,
+      "learning_rate": 4.755789473684211e-06,
+      "loss": 0.6394,
+      "step": 191
+    },
+    {
+      "epoch": 0.3368421052631579,
+      "grad_norm": 0.5385306477546692,
+      "learning_rate": 4.227368421052631e-06,
+      "loss": 0.6832,
+      "step": 192
+    },
+    {
+      "epoch": 0.3385964912280702,
+      "grad_norm": 0.5240684151649475,
+      "learning_rate": 3.698947368421052e-06,
+      "loss": 0.6332,
+      "step": 193
+    },
+    {
+      "epoch": 0.34035087719298246,
+      "grad_norm": 0.5050160884857178,
+      "learning_rate": 3.170526315789474e-06,
+      "loss": 0.6008,
+      "step": 194
+    },
+    {
+      "epoch": 0.34210526315789475,
+      "grad_norm": 0.41370368003845215,
+      "learning_rate": 2.6421052631578948e-06,
+      "loss": 0.4953,
+      "step": 195
+    },
+    {
+      "epoch": 0.34385964912280703,
+      "grad_norm": 0.5040670037269592,
+      "learning_rate": 2.1136842105263157e-06,
+      "loss": 0.7212,
+      "step": 196
+    },
+    {
+      "epoch": 0.3456140350877193,
+      "grad_norm": 0.48486071825027466,
+      "learning_rate": 1.585263157894737e-06,
+      "loss": 0.4893,
+      "step": 197
+    },
+    {
+      "epoch": 0.3473684210526316,
+      "grad_norm": 0.5510401725769043,
+      "learning_rate": 1.0568421052631578e-06,
+      "loss": 0.6578,
+      "step": 198
+    },
+    {
+      "epoch": 0.34912280701754383,
+      "grad_norm": 0.7318386435508728,
+      "learning_rate": 5.284210526315789e-07,
+      "loss": 0.4288,
+      "step": 199
+    },
+    {
+      "epoch": 0.3508771929824561,
+      "grad_norm": 0.6111555099487305,
+      "learning_rate": 0.0,
+      "loss": 0.4614,
+      "step": 200
+    },
+    {
+      "epoch": 0.3508771929824561,
+      "eval_loss": 0.5751292109489441,
+      "eval_runtime": 18.4962,
+      "eval_samples_per_second": 12.976,
+      "eval_steps_per_second": 3.244,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.871152053157888e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null