Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a80492f87314e42ff090e9307e4b4392db25de456d036e9c0ac85865d45656b4
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:27181cf1af8cd6ab09bdd6e86509935d6815cf8e2ba87b51987b3b929dbbf15e
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f9bdcb51d04fdc6df976845ff83533daa2c49434ff7e1a24190a1aefd73688b
 size 43122580

 version https://git-lfs.github.com/spec/v1
+oid sha256:31903c9e1781861fe55c4c0d1bc48a981e34b9e68b3cb04073e11d1775cfbb59
 size 43122580

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9227e96e2467edca784e9e39f29d36aa0aa4c363b3b1ef29f0280163c88be1b0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:355e76fc8598e2798497bc3f2005a10b27ba180fd974991d896736ff11192f57
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9e02dc10b7239989ab9b4418ee704e53fad611ad6b77ad633028bb8eb5238dd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fc7800513a1b4dd006c457152c700dd768bb49ee4ed8e4d9665a4e42095b054
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.015278838808250574,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 31.678,
       "eval_steps_per_second": 15.839,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -392,7 +750,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9479016771747840.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.030557677616501147,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 31.678,
       "eval_steps_per_second": 15.839,
       "step": 50
+    },
+    {
+      "epoch": 0.015584415584415584,
+      "grad_norm": 2.4259822368621826,
+      "learning_rate": 0.00017788772787621126,
+      "loss": 4.0701,
+      "step": 51
+    },
+    {
+      "epoch": 0.015889992360580595,
+      "grad_norm": 1.828935146331787,
+      "learning_rate": 0.00017684011108568592,
+      "loss": 3.8272,
+      "step": 52
+    },
+    {
+      "epoch": 0.016195569136745608,
+      "grad_norm": 1.6015702486038208,
+      "learning_rate": 0.0001757714869760335,
+      "loss": 4.0941,
+      "step": 53
+    },
+    {
+      "epoch": 0.01650114591291062,
+      "grad_norm": 1.4622135162353516,
+      "learning_rate": 0.0001746821476984154,
+      "loss": 2.8427,
+      "step": 54
+    },
+    {
+      "epoch": 0.01680672268907563,
+      "grad_norm": 2.106966018676758,
+      "learning_rate": 0.00017357239106731317,
+      "loss": 2.3866,
+      "step": 55
+    },
+    {
+      "epoch": 0.017112299465240642,
+      "grad_norm": 2.9907472133636475,
+      "learning_rate": 0.00017244252047910892,
+      "loss": 3.2555,
+      "step": 56
+    },
+    {
+      "epoch": 0.017417876241405655,
+      "grad_norm": 1.2781822681427002,
+      "learning_rate": 0.00017129284482913972,
+      "loss": 2.8657,
+      "step": 57
+    },
+    {
+      "epoch": 0.017723453017570664,
+      "grad_norm": 1.5655877590179443,
+      "learning_rate": 0.00017012367842724887,
+      "loss": 2.6976,
+      "step": 58
+    },
+    {
+      "epoch": 0.018029029793735676,
+      "grad_norm": 1.7124484777450562,
+      "learning_rate": 0.0001689353409118566,
+      "loss": 2.5161,
+      "step": 59
+    },
+    {
+      "epoch": 0.01833460656990069,
+      "grad_norm": 2.1622180938720703,
+      "learning_rate": 0.00016772815716257412,
+      "loss": 2.9102,
+      "step": 60
+    },
+    {
+      "epoch": 0.018640183346065698,
+      "grad_norm": 1.4135297536849976,
+      "learning_rate": 0.0001665024572113848,
+      "loss": 1.7485,
+      "step": 61
+    },
+    {
+      "epoch": 0.01894576012223071,
+      "grad_norm": 1.8781421184539795,
+      "learning_rate": 0.00016525857615241687,
+      "loss": 3.8095,
+      "step": 62
+    },
+    {
+      "epoch": 0.019251336898395723,
+      "grad_norm": 1.5626355409622192,
+      "learning_rate": 0.00016399685405033167,
+      "loss": 1.9366,
+      "step": 63
+    },
+    {
+      "epoch": 0.019556913674560732,
+      "grad_norm": 1.412752389907837,
+      "learning_rate": 0.0001627176358473537,
+      "loss": 1.9336,
+      "step": 64
+    },
+    {
+      "epoch": 0.019862490450725745,
+      "grad_norm": 5.998400688171387,
+      "learning_rate": 0.0001614212712689668,
+      "loss": 3.1919,
+      "step": 65
+    },
+    {
+      "epoch": 0.020168067226890758,
+      "grad_norm": 1.5334243774414062,
+      "learning_rate": 0.00016010811472830252,
+      "loss": 2.7113,
+      "step": 66
+    },
+    {
+      "epoch": 0.020473644003055767,
+      "grad_norm": 2.1354057788848877,
+      "learning_rate": 0.00015877852522924732,
+      "loss": 2.988,
+      "step": 67
+    },
+    {
+      "epoch": 0.02077922077922078,
+      "grad_norm": 1.9327161312103271,
+      "learning_rate": 0.00015743286626829437,
+      "loss": 2.9572,
+      "step": 68
+    },
+    {
+      "epoch": 0.021084797555385792,
+      "grad_norm": 2.1718881130218506,
+      "learning_rate": 0.0001560715057351673,
+      "loss": 3.5648,
+      "step": 69
+    },
+    {
+      "epoch": 0.0213903743315508,
+      "grad_norm": 1.8955811262130737,
+      "learning_rate": 0.00015469481581224272,
+      "loss": 3.1244,
+      "step": 70
+    },
+    {
+      "epoch": 0.021695951107715813,
+      "grad_norm": 1.5830830335617065,
+      "learning_rate": 0.0001533031728727994,
+      "loss": 2.7921,
+      "step": 71
+    },
+    {
+      "epoch": 0.022001527883880826,
+      "grad_norm": 2.2739856243133545,
+      "learning_rate": 0.00015189695737812152,
+      "loss": 3.5455,
+      "step": 72
+    },
+    {
+      "epoch": 0.022307104660045835,
+      "grad_norm": 0.9828822612762451,
+      "learning_rate": 0.0001504765537734844,
+      "loss": 1.6882,
+      "step": 73
+    },
+    {
+      "epoch": 0.022612681436210848,
+      "grad_norm": 1.9325013160705566,
+      "learning_rate": 0.00014904235038305083,
+      "loss": 2.1146,
+      "step": 74
+    },
+    {
+      "epoch": 0.02291825821237586,
+      "grad_norm": 1.3537631034851074,
+      "learning_rate": 0.00014759473930370736,
+      "loss": 2.2816,
+      "step": 75
+    },
+    {
+      "epoch": 0.02322383498854087,
+      "grad_norm": 1.826690673828125,
+      "learning_rate": 0.0001461341162978688,
+      "loss": 1.5235,
+      "step": 76
+    },
+    {
+      "epoch": 0.023529411764705882,
+      "grad_norm": 1.8681014776229858,
+      "learning_rate": 0.00014466088068528068,
+      "loss": 4.0703,
+      "step": 77
+    },
+    {
+      "epoch": 0.023834988540870895,
+      "grad_norm": 4.881453514099121,
+      "learning_rate": 0.00014317543523384928,
+      "loss": 3.2547,
+      "step": 78
+    },
+    {
+      "epoch": 0.024140565317035904,
+      "grad_norm": 2.301090955734253,
+      "learning_rate": 0.00014167818604952906,
+      "loss": 2.3378,
+      "step": 79
+    },
+    {
+      "epoch": 0.024446142093200916,
+      "grad_norm": 1.1395305395126343,
+      "learning_rate": 0.00014016954246529696,
+      "loss": 1.5157,
+      "step": 80
+    },
+    {
+      "epoch": 0.02475171886936593,
+      "grad_norm": 1.7658803462982178,
+      "learning_rate": 0.00013864991692924523,
+      "loss": 4.2361,
+      "step": 81
+    },
+    {
+      "epoch": 0.025057295645530938,
+      "grad_norm": 1.827609896659851,
+      "learning_rate": 0.00013711972489182208,
+      "loss": 3.0601,
+      "step": 82
+    },
+    {
+      "epoch": 0.02536287242169595,
+      "grad_norm": 1.9651907682418823,
+      "learning_rate": 0.00013557938469225167,
+      "loss": 3.6306,
+      "step": 83
+    },
+    {
+      "epoch": 0.025668449197860963,
+      "grad_norm": 2.074267625808716,
+      "learning_rate": 0.00013402931744416433,
+      "loss": 2.9667,
+      "step": 84
+    },
+    {
+      "epoch": 0.025974025974025976,
+      "grad_norm": 1.3315553665161133,
+      "learning_rate": 0.00013246994692046836,
+      "loss": 1.88,
+      "step": 85
+    },
+    {
+      "epoch": 0.026279602750190985,
+      "grad_norm": 1.5968420505523682,
+      "learning_rate": 0.00013090169943749476,
+      "loss": 3.7276,
+      "step": 86
+    },
+    {
+      "epoch": 0.026585179526355997,
+      "grad_norm": 1.9459774494171143,
+      "learning_rate": 0.0001293250037384465,
+      "loss": 2.5366,
+      "step": 87
+    },
+    {
+      "epoch": 0.02689075630252101,
+      "grad_norm": 1.9473600387573242,
+      "learning_rate": 0.00012774029087618446,
+      "loss": 3.8513,
+      "step": 88
+    },
+    {
+      "epoch": 0.02719633307868602,
+      "grad_norm": 1.4431513547897339,
+      "learning_rate": 0.00012614799409538198,
+      "loss": 2.5023,
+      "step": 89
+    },
+    {
+      "epoch": 0.02750190985485103,
+      "grad_norm": 3.223552703857422,
+      "learning_rate": 0.00012454854871407994,
+      "loss": 2.0943,
+      "step": 90
+    },
+    {
+      "epoch": 0.027807486631016044,
+      "grad_norm": 2.679762363433838,
+      "learning_rate": 0.00012294239200467516,
+      "loss": 3.3053,
+      "step": 91
+    },
+    {
+      "epoch": 0.028113063407181053,
+      "grad_norm": 2.0697975158691406,
+      "learning_rate": 0.0001213299630743747,
+      "loss": 3.5176,
+      "step": 92
+    },
+    {
+      "epoch": 0.028418640183346066,
+      "grad_norm": 2.661999464035034,
+      "learning_rate": 0.00011971170274514802,
+      "loss": 3.7355,
+      "step": 93
+    },
+    {
+      "epoch": 0.02872421695951108,
+      "grad_norm": 1.8615680932998657,
+      "learning_rate": 0.000118088053433211,
+      "loss": 2.0932,
+      "step": 94
+    },
+    {
+      "epoch": 0.029029793735676088,
+      "grad_norm": 2.583749532699585,
+      "learning_rate": 0.00011645945902807341,
+      "loss": 4.0713,
+      "step": 95
+    },
+    {
+      "epoch": 0.0293353705118411,
+      "grad_norm": 1.8530974388122559,
+      "learning_rate": 0.0001148263647711842,
+      "loss": 1.5702,
+      "step": 96
+    },
+    {
+      "epoch": 0.029640947288006113,
+      "grad_norm": 1.7810598611831665,
+      "learning_rate": 0.00011318921713420691,
+      "loss": 3.71,
+      "step": 97
+    },
+    {
+      "epoch": 0.029946524064171122,
+      "grad_norm": 2.1363232135772705,
+      "learning_rate": 0.00011154846369695863,
+      "loss": 3.1915,
+      "step": 98
+    },
+    {
+      "epoch": 0.030252100840336135,
+      "grad_norm": 1.8341383934020996,
+      "learning_rate": 0.0001099045530250463,
+      "loss": 2.4543,
+      "step": 99
+    },
+    {
+      "epoch": 0.030557677616501147,
+      "grad_norm": 1.8934003114700317,
+      "learning_rate": 0.00010825793454723325,
+      "loss": 2.6079,
+      "step": 100
+    },
+    {
+      "epoch": 0.030557677616501147,
+      "eval_loss": 3.186122417449951,
+      "eval_runtime": 43.6356,
+      "eval_samples_per_second": 31.58,
+      "eval_steps_per_second": 15.79,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.895803354349568e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null