Training in progress, step 9500, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:336e73efd327d7a1521b5a4d27a9fc5038eee564a716b9140d3d16b364a23a50
 size 891558696

 version https://git-lfs.github.com/spec/v1
+oid sha256:e739525a605197613b8e712741d416769ed5f57619fff2ba4f8511681ce890ed
 size 891558696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d97c69acfb193391ec999ba15e870e63ab169b8ed9bf00f41fd88c47a66c65b8
 size 1783272762

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4c2b2a5410ff424a41e1a6cde39521ad45e7ac7555648b29eaff3b782134b0c
 size 1783272762

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:899366455e0955e5cf5c394ff7d4037b7abb9e5bade054905761b5a1a2bd5b8c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ebf0d13686eae3e0c0f52410d677d657c1203d01171ca9804ddd94825765dcc7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a63facda9ec9ba059fa6f2a71adafa722008934178d5e063ac43b7ad2180ad7f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4e47b661f0982d98bcfee7c8a9db89091e3f5b040309ef2739e290fa07adec1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.6,
   "eval_steps": 500,
-  "global_step": 9000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -637,6 +637,41 @@
       "learning_rate": 1.6001333333333336e-05,
       "loss": 0.0635,
       "step": 9000
     }
   ],
   "logging_steps": 100,
@@ -656,7 +691,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.192248406016e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.6333333333333333,
   "eval_steps": 500,
+  "global_step": 9500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.6001333333333336e-05,
       "loss": 0.0635,
       "step": 9000
+    },
+    {
+      "epoch": 0.6066666666666667,
+      "grad_norm": 0.2200908064842224,
+      "learning_rate": 1.595688888888889e-05,
+      "loss": 0.0672,
+      "step": 9100
+    },
+    {
+      "epoch": 0.6133333333333333,
+      "grad_norm": 0.12727037072181702,
+      "learning_rate": 1.5912444444444446e-05,
+      "loss": 0.0689,
+      "step": 9200
+    },
+    {
+      "epoch": 0.62,
+      "grad_norm": 2.2468693256378174,
+      "learning_rate": 1.5868e-05,
+      "loss": 0.0642,
+      "step": 9300
+    },
+    {
+      "epoch": 0.6266666666666667,
+      "grad_norm": 0.22551049292087555,
+      "learning_rate": 1.5823555555555556e-05,
+      "loss": 0.0721,
+      "step": 9400
+    },
+    {
+      "epoch": 0.6333333333333333,
+      "grad_norm": 0.13281604647636414,
+      "learning_rate": 1.577911111111111e-05,
+      "loss": 0.0635,
+      "step": 9500
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 2.314039984128e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null