Training in progress, step 204, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +95 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:952d1a882b9cca3451ed6a97b532359cc908cc7e228144834369739e7d673517
 size 132164608

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c931d5dcab4934d664c86b7b9f1cc7cd35a706b9856206ace9d697eb010c61d
 size 132164608

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ebd9b3809508d84f0b43948535f48349685b3070faa9b0780a120c15d7f6a8d6
 size 67487892

 version https://git-lfs.github.com/spec/v1
+oid sha256:29b3f55f7dbe9de2ff2648d336ff782d60d2eaf477c10452e4200763918493d8
 size 67487892

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2009f4880cf0cac08ca9532dea6b28d8de9f65ae7c47b80222df55d29c632810
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6936f48172630f3b195d633d05bbdd084bbd64378cb9f0296e98ae7438be100
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f21ce5519aba36efeb75a8dad39ab6bd85bd42d0ae24cbc1f5cfa5d96741b8bc
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f9839d107756d9c8815de9164f2ebf92c05b3536704a349ca5892084df7663e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.09447068630175048,
   "eval_steps": 34,
-  "global_step": 170,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -447,6 +447,98 @@
       "eval_samples_per_second": 7.825,
       "eval_steps_per_second": 0.978,
       "step": 170
     }
   ],
   "logging_steps": 3,
@@ -466,7 +558,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.37712210540544e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.11336482356210058,
   "eval_steps": 34,
+  "global_step": 204,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.825,
       "eval_steps_per_second": 0.978,
       "step": 170
+    },
+    {
+      "epoch": 0.09502639622117255,
+      "grad_norm": 1.5192731618881226,
+      "learning_rate": 3.17617799075421e-05,
+      "loss": 4.9727,
+      "step": 171
+    },
+    {
+      "epoch": 0.09669352597943873,
+      "grad_norm": 2.1325037479400635,
+      "learning_rate": 3.1178227669141744e-05,
+      "loss": 5.287,
+      "step": 174
+    },
+    {
+      "epoch": 0.09836065573770492,
+      "grad_norm": 1.6394548416137695,
+      "learning_rate": 3.0591067519763895e-05,
+      "loss": 5.0878,
+      "step": 177
+    },
+    {
+      "epoch": 0.1000277854959711,
+      "grad_norm": 1.954785704612732,
+      "learning_rate": 3.0000642344401113e-05,
+      "loss": 5.7474,
+      "step": 180
+    },
+    {
+      "epoch": 0.1016949152542373,
+      "grad_norm": 1.7333064079284668,
+      "learning_rate": 2.9407296934729227e-05,
+      "loss": 5.2069,
+      "step": 183
+    },
+    {
+      "epoch": 0.10336204501250347,
+      "grad_norm": 1.7775465250015259,
+      "learning_rate": 2.8811377787758636e-05,
+      "loss": 4.8365,
+      "step": 186
+    },
+    {
+      "epoch": 0.10502917477076966,
+      "grad_norm": 1.766340970993042,
+      "learning_rate": 2.8213232903489865e-05,
+      "loss": 4.8806,
+      "step": 189
+    },
+    {
+      "epoch": 0.10669630452903585,
+      "grad_norm": 2.064275026321411,
+      "learning_rate": 2.761321158169134e-05,
+      "loss": 5.1876,
+      "step": 192
+    },
+    {
+      "epoch": 0.10836343428730202,
+      "grad_norm": 1.731985330581665,
+      "learning_rate": 2.7011664217918154e-05,
+      "loss": 4.6924,
+      "step": 195
+    },
+    {
+      "epoch": 0.11003056404556821,
+      "grad_norm": 1.8852187395095825,
+      "learning_rate": 2.6408942098890936e-05,
+      "loss": 5.0911,
+      "step": 198
+    },
+    {
+      "epoch": 0.1116976938038344,
+      "grad_norm": 1.8446505069732666,
+      "learning_rate": 2.580539719735433e-05,
+      "loss": 5.0379,
+      "step": 201
+    },
+    {
+      "epoch": 0.11336482356210058,
+      "grad_norm": 1.863871455192566,
+      "learning_rate": 2.5201381966534748e-05,
+      "loss": 5.3173,
+      "step": 204
+    },
+    {
+      "epoch": 0.11336482356210058,
+      "eval_loss": 1.3148518800735474,
+      "eval_runtime": 387.4809,
+      "eval_samples_per_second": 7.822,
+      "eval_steps_per_second": 0.978,
+      "step": 204
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 4.07623549249536e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null