Training in progress, step 450, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8f8826c24088299a847f5820129d27f2c0e2104a9687f918bd1a7f0a94bca21
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:880d82cbf4288a34ac5d5a5484c51cc161a6e279ddaa6031aaf6f0457c2d75fd
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d171f117002fea7d4a1e35bfeb53c60e94190276664b446b9df6f6b99e2dcc1
 size 325340244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c679ab5256053081dc5b24567519d1695787e49627925f1a7826c48f6c87fa4
 size 325340244

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:899d8ab769e09e41875b65f9be514d6bc58a945fb52596b0dc4bd5ac4f6fd3c5
 size 14308

 version https://git-lfs.github.com/spec/v1
+oid sha256:f998b9f6168b3d155f83f6ebd1241e34434e1ae4be092e5c84373f58ecc53a23
 size 14308

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4be7b38aa7a01f117ebb95bff4f596a12bcbebf1c53b704b1e44c42b56276682
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe3012fb121cd84e6e41f718fd33de9e34ed33145ada7b055f7e25e49408431d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9644750356674194,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.34843205574912894,
   "eval_steps": 50,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -359,6 +359,49 @@
       "eval_samples_per_second": 16.122,
       "eval_steps_per_second": 4.03,
       "step": 400
     }
   ],
   "logging_steps": 10,
@@ -387,7 +430,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.439937833218867e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.9586628079414368,
+  "best_model_checkpoint": "miner_id_24/checkpoint-450",
+  "epoch": 0.39198606271777003,
   "eval_steps": 50,
+  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 16.122,
       "eval_steps_per_second": 4.03,
       "step": 400
+    },
+    {
+      "epoch": 0.35714285714285715,
+      "grad_norm": 0.9414017796516418,
+      "learning_rate": 2.081714761313074e-05,
+      "loss": 0.9142,
+      "step": 410
+    },
+    {
+      "epoch": 0.36585365853658536,
+      "grad_norm": 0.535990297794342,
+      "learning_rate": 1.656275751894957e-05,
+      "loss": 0.9001,
+      "step": 420
+    },
+    {
+      "epoch": 0.37456445993031356,
+      "grad_norm": 0.7875869870185852,
+      "learning_rate": 1.275871237837696e-05,
+      "loss": 0.9874,
+      "step": 430
+    },
+    {
+      "epoch": 0.3832752613240418,
+      "grad_norm": 1.1520731449127197,
+      "learning_rate": 9.423545116956494e-06,
+      "loss": 1.0616,
+      "step": 440
+    },
+    {
+      "epoch": 0.39198606271777003,
+      "grad_norm": 2.8019986152648926,
+      "learning_rate": 6.573504334335994e-06,
+      "loss": 0.9982,
+      "step": 450
+    },
+    {
+      "epoch": 0.39198606271777003,
+      "eval_loss": 0.9586628079414368,
+      "eval_runtime": 30.0265,
+      "eval_samples_per_second": 16.119,
+      "eval_steps_per_second": 4.03,
+      "step": 450
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 8.3877540397056e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null