Training in progress, step 3500, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:995dadf0d3d095797b5dbc557b2d1541ca751ef767140b2eac486d1f7802f08c
 size 891558696

 version https://git-lfs.github.com/spec/v1
+oid sha256:8153813283c4389ef5a9863dfbf03bd90de9af06723c2ed7469560d2f6cb9016
 size 891558696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7361a2fab55f925d42ccfef981caf4fa3db8ce7635371819cdbf2b69ea2a0076
 size 1783272762

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e3c6d028cc61eb16c10cfeb96ae472b416bf61c2430d616409a7e075447463c
 size 1783272762

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eefe6d642f2fec79a1485caf3f0bf5664dd3a0b7470a19b36fef717b4ce4330f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b21a8df05d6c8e0e6fd608ca76fd60fcfc2fde098551b903447e393817425942
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:920ac20f3da0aa073b783e5c39cbf10201482c9a198f2029422a6d2d7dd4763e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3da069d23e3fc76f1f7013dd495a2b0f4544633e580b179d33500b4d99b2575
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2,
   "eval_steps": 500,
-  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -217,6 +217,41 @@
       "learning_rate": 1.8667555555555555e-05,
       "loss": 0.073,
       "step": 3000
     }
   ],
   "logging_steps": 100,
@@ -236,7 +271,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7307494686720000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.23333333333333334,
   "eval_steps": 500,
+  "global_step": 3500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.8667555555555555e-05,
       "loss": 0.073,
       "step": 3000
+    },
+    {
+      "epoch": 0.20666666666666667,
+      "grad_norm": 0.28241923451423645,
+      "learning_rate": 1.862311111111111e-05,
+      "loss": 0.0682,
+      "step": 3100
+    },
+    {
+      "epoch": 0.21333333333333335,
+      "grad_norm": 0.2821277976036072,
+      "learning_rate": 1.857866666666667e-05,
+      "loss": 0.0813,
+      "step": 3200
+    },
+    {
+      "epoch": 0.22,
+      "grad_norm": 0.2306368201971054,
+      "learning_rate": 1.8534222222222224e-05,
+      "loss": 0.0741,
+      "step": 3300
+    },
+    {
+      "epoch": 0.22666666666666666,
+      "grad_norm": 0.29834499955177307,
+      "learning_rate": 1.848977777777778e-05,
+      "loss": 0.0801,
+      "step": 3400
+    },
+    {
+      "epoch": 0.23333333333333334,
+      "grad_norm": 1.8099658489227295,
+      "learning_rate": 1.8445333333333334e-05,
+      "loss": 0.0726,
+      "step": 3500
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 8525410467840000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null