Training in progress, step 25500, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b8cc3b5dc11d4b2e43886ec06772cc1df8fb96be6f1f9a0270f9f6e971f6d0e2
 size 891558696

 version https://git-lfs.github.com/spec/v1
+oid sha256:77f8bbdcef942ca91b5ec7bbbd735231f5e4d26584c5cdee0e481501fe23e48b
 size 891558696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3972de8e85ec92a8ee5bab9415d6a3d3f55feb08f92a26b7359e3c1d72344bc5
 size 1783272762

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f30063e10b99a88358cb2b15c6445572352ad0fdb9ca22aac6f16a615cc3216
 size 1783272762

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fbd5b30263f6cc98f2bb34e98264f7dd554a2e53f93ad44d0666e3bd6bfb80c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbcd414e2ebe010dcb7c52553aab3fd4fa6d365b6f63593edeab1858ed2ed198
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7471c9521805c24a8ce63003bd5efeaf8bf27814fa393d151be750f3265d98e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:797fe0fbbc0deae3aec7fbd325c4b3b26250213d429ef253b6bc3ac068bea992
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.6666666666666665,
   "eval_steps": 500,
-  "global_step": 25000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1765,6 +1765,41 @@
       "learning_rate": 8.892444444444445e-06,
       "loss": 0.0566,
       "step": 25000
     }
   ],
   "logging_steps": 100,
@@ -1784,7 +1819,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.0895789056e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.7,
   "eval_steps": 500,
+  "global_step": 25500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.892444444444445e-06,
       "loss": 0.0566,
       "step": 25000
+    },
+    {
+      "epoch": 1.6733333333333333,
+      "grad_norm": 0.14110605418682098,
+      "learning_rate": 8.848e-06,
+      "loss": 0.0554,
+      "step": 25100
+    },
+    {
+      "epoch": 1.6800000000000002,
+      "grad_norm": 0.17642982304096222,
+      "learning_rate": 8.803555555555556e-06,
+      "loss": 0.0581,
+      "step": 25200
+    },
+    {
+      "epoch": 1.6866666666666665,
+      "grad_norm": 0.19941475987434387,
+      "learning_rate": 8.759111111111111e-06,
+      "loss": 0.0598,
+      "step": 25300
+    },
+    {
+      "epoch": 1.6933333333333334,
+      "grad_norm": 0.15960603952407837,
+      "learning_rate": 8.714666666666666e-06,
+      "loss": 0.0531,
+      "step": 25400
+    },
+    {
+      "epoch": 1.7,
+      "grad_norm": 0.17013077437877655,
+      "learning_rate": 8.670222222222223e-06,
+      "loss": 0.0596,
+      "step": 25500
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 6.211370483712e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null