Training in progress, step 363, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +642 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1190f8ea07552d2cf7b233831d176ca439c3608aadb29d6f2969f6b5f9f853a
 size 17425352

 version https://git-lfs.github.com/spec/v1
+oid sha256:70cc9bfc54e5704a0456169a978825609ac71b7b1c9c38de547b1a30937e3ae5
 size 17425352

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1cbe0698af4298918ce8cfa5d500475d02ffc884ef48ada3d64849c596a2f39a
 size 10252116

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f0ffc6285cb67ef2253eac085c595861b394d0f4ef88ea3bf8478c9a9b37965
 size 10252116

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed10d462332dfdf5894804ba3ded0da179db213feaaae2c6345c62710844c539
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5aecdf1cddcfbe1387576f944dfa673d525856b11837f99ce53a5e262b12e4b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf75957cd974601020f5995f38dbafb5420a0acbd3c06be292250cfb4e7d1857
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ac5c649b3b07ad0898720745fd06763ece21dd621fd1e465e3bf3dd609d7456
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7536231884057971,
   "eval_steps": 500,
-  "global_step": 273,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1918,6 +1918,644 @@
       "learning_rate": 1.4800129365390281e-05,
       "loss": 2.1697,
       "step": 273
     }
   ],
   "logging_steps": 1,
@@ -1932,12 +2570,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8560382123704320.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0020703933747412,
   "eval_steps": 500,
+  "global_step": 363,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.4800129365390281e-05,
       "loss": 2.1697,
       "step": 273
+    },
+    {
+      "epoch": 0.756383712905452,
+      "grad_norm": 0.6054173707962036,
+      "learning_rate": 1.4489873172477409e-05,
+      "loss": 1.6706,
+      "step": 274
+    },
+    {
+      "epoch": 0.759144237405107,
+      "grad_norm": 0.7372981905937195,
+      "learning_rate": 1.4182351512311237e-05,
+      "loss": 1.7511,
+      "step": 275
+    },
+    {
+      "epoch": 0.7619047619047619,
+      "grad_norm": 0.6251423954963684,
+      "learning_rate": 1.3877588066250453e-05,
+      "loss": 1.423,
+      "step": 276
+    },
+    {
+      "epoch": 0.7646652864044169,
+      "grad_norm": 0.6341333985328674,
+      "learning_rate": 1.357560630325158e-05,
+      "loss": 1.9509,
+      "step": 277
+    },
+    {
+      "epoch": 0.7674258109040718,
+      "grad_norm": 0.6252625584602356,
+      "learning_rate": 1.3276429478061741e-05,
+      "loss": 1.992,
+      "step": 278
+    },
+    {
+      "epoch": 0.7701863354037267,
+      "grad_norm": 0.6359195709228516,
+      "learning_rate": 1.2980080629427904e-05,
+      "loss": 1.9086,
+      "step": 279
+    },
+    {
+      "epoch": 0.7729468599033816,
+      "grad_norm": 0.8791253566741943,
+      "learning_rate": 1.2686582578322631e-05,
+      "loss": 1.8734,
+      "step": 280
+    },
+    {
+      "epoch": 0.7757073844030365,
+      "grad_norm": 0.6764018535614014,
+      "learning_rate": 1.2395957926186803e-05,
+      "loss": 1.83,
+      "step": 281
+    },
+    {
+      "epoch": 0.7784679089026915,
+      "grad_norm": 0.752800703048706,
+      "learning_rate": 1.2108229053189097e-05,
+      "loss": 1.7646,
+      "step": 282
+    },
+    {
+      "epoch": 0.7812284334023465,
+      "grad_norm": 0.7744107842445374,
+      "learning_rate": 1.1823418116502565e-05,
+      "loss": 1.9604,
+      "step": 283
+    },
+    {
+      "epoch": 0.7839889579020014,
+      "grad_norm": 0.8803069591522217,
+      "learning_rate": 1.1541547048598383e-05,
+      "loss": 2.4493,
+      "step": 284
+    },
+    {
+      "epoch": 0.7867494824016563,
+      "grad_norm": 0.834362268447876,
+      "learning_rate": 1.1262637555556903e-05,
+      "loss": 2.0796,
+      "step": 285
+    },
+    {
+      "epoch": 0.7895100069013112,
+      "grad_norm": 1.0516443252563477,
+      "learning_rate": 1.0986711115396058e-05,
+      "loss": 2.3237,
+      "step": 286
+    },
+    {
+      "epoch": 0.7922705314009661,
+      "grad_norm": 0.7121506929397583,
+      "learning_rate": 1.071378897641752e-05,
+      "loss": 2.1616,
+      "step": 287
+    },
+    {
+      "epoch": 0.7950310559006211,
+      "grad_norm": 0.6001153588294983,
+      "learning_rate": 1.044389215557034e-05,
+      "loss": 1.7798,
+      "step": 288
+    },
+    {
+      "epoch": 0.7977915804002761,
+      "grad_norm": 0.8006875514984131,
+      "learning_rate": 1.0177041436832507e-05,
+      "loss": 2.3374,
+      "step": 289
+    },
+    {
+      "epoch": 0.800552104899931,
+      "grad_norm": 0.6787489652633667,
+      "learning_rate": 9.913257369610473e-06,
+      "loss": 2.1637,
+      "step": 290
+    },
+    {
+      "epoch": 0.8033126293995859,
+      "grad_norm": 0.8265159130096436,
+      "learning_rate": 9.652560267156647e-06,
+      "loss": 2.246,
+      "step": 291
+    },
+    {
+      "epoch": 0.8060731538992408,
+      "grad_norm": 1.0507712364196777,
+      "learning_rate": 9.394970205005177e-06,
+      "loss": 1.8864,
+      "step": 292
+    },
+    {
+      "epoch": 0.8088336783988958,
+      "grad_norm": 0.9701467752456665,
+      "learning_rate": 9.140507019425981e-06,
+      "loss": 2.4656,
+      "step": 293
+    },
+    {
+      "epoch": 0.8115942028985508,
+      "grad_norm": 0.9829404950141907,
+      "learning_rate": 8.88919030589721e-06,
+      "loss": 1.8753,
+      "step": 294
+    },
+    {
+      "epoch": 0.8143547273982057,
+      "grad_norm": 1.346711277961731,
+      "learning_rate": 8.64103941759618e-06,
+      "loss": 1.905,
+      "step": 295
+    },
+    {
+      "epoch": 0.8171152518978606,
+      "grad_norm": 1.1422017812728882,
+      "learning_rate": 8.39607346390921e-06,
+      "loss": 2.1189,
+      "step": 296
+    },
+    {
+      "epoch": 0.8198757763975155,
+      "grad_norm": 1.2060613632202148,
+      "learning_rate": 8.15431130895991e-06,
+      "loss": 1.5586,
+      "step": 297
+    },
+    {
+      "epoch": 0.8226363008971704,
+      "grad_norm": 1.0204120874404907,
+      "learning_rate": 7.915771570156554e-06,
+      "loss": 1.854,
+      "step": 298
+    },
+    {
+      "epoch": 0.8253968253968254,
+      "grad_norm": 1.0156620740890503,
+      "learning_rate": 7.680472616758466e-06,
+      "loss": 1.8096,
+      "step": 299
+    },
+    {
+      "epoch": 0.8281573498964804,
+      "grad_norm": 1.82632577419281,
+      "learning_rate": 7.448432568461344e-06,
+      "loss": 1.8969,
+      "step": 300
+    },
+    {
+      "epoch": 0.8309178743961353,
+      "grad_norm": 0.6277664303779602,
+      "learning_rate": 7.219669294002002e-06,
+      "loss": 2.484,
+      "step": 301
+    },
+    {
+      "epoch": 0.8336783988957902,
+      "grad_norm": 0.6709372401237488,
+      "learning_rate": 6.9942004097823535e-06,
+      "loss": 2.0705,
+      "step": 302
+    },
+    {
+      "epoch": 0.8364389233954451,
+      "grad_norm": 0.6607434153556824,
+      "learning_rate": 6.7720432785127465e-06,
+      "loss": 1.9909,
+      "step": 303
+    },
+    {
+      "epoch": 0.8391994478951,
+      "grad_norm": 0.5632132291793823,
+      "learning_rate": 6.553215007874985e-06,
+      "loss": 1.9455,
+      "step": 304
+    },
+    {
+      "epoch": 0.841959972394755,
+      "grad_norm": 0.541318416595459,
+      "learning_rate": 6.337732449204886e-06,
+      "loss": 2.0353,
+      "step": 305
+    },
+    {
+      "epoch": 0.84472049689441,
+      "grad_norm": 0.7076634168624878,
+      "learning_rate": 6.1256121961945915e-06,
+      "loss": 1.7995,
+      "step": 306
+    },
+    {
+      "epoch": 0.8474810213940649,
+      "grad_norm": 0.6370120644569397,
+      "learning_rate": 5.916870583614792e-06,
+      "loss": 1.9695,
+      "step": 307
+    },
+    {
+      "epoch": 0.8502415458937198,
+      "grad_norm": 0.5746281147003174,
+      "learning_rate": 5.711523686056769e-06,
+      "loss": 2.2275,
+      "step": 308
+    },
+    {
+      "epoch": 0.8530020703933747,
+      "grad_norm": 0.6816925406455994,
+      "learning_rate": 5.509587316694536e-06,
+      "loss": 1.8413,
+      "step": 309
+    },
+    {
+      "epoch": 0.8557625948930296,
+      "grad_norm": 0.7342681288719177,
+      "learning_rate": 5.311077026067196e-06,
+      "loss": 2.1986,
+      "step": 310
+    },
+    {
+      "epoch": 0.8585231193926847,
+      "grad_norm": 0.6418979167938232,
+      "learning_rate": 5.116008100881348e-06,
+      "loss": 1.8917,
+      "step": 311
+    },
+    {
+      "epoch": 0.8612836438923396,
+      "grad_norm": 0.6395264863967896,
+      "learning_rate": 4.924395562833933e-06,
+      "loss": 1.9768,
+      "step": 312
+    },
+    {
+      "epoch": 0.8640441683919945,
+      "grad_norm": 0.6867514252662659,
+      "learning_rate": 4.736254167455473e-06,
+      "loss": 2.2776,
+      "step": 313
+    },
+    {
+      "epoch": 0.8668046928916494,
+      "grad_norm": 0.5997049808502197,
+      "learning_rate": 4.5515984029737615e-06,
+      "loss": 1.8973,
+      "step": 314
+    },
+    {
+      "epoch": 0.8695652173913043,
+      "grad_norm": 0.7374957799911499,
+      "learning_rate": 4.370442489198179e-06,
+      "loss": 1.8923,
+      "step": 315
+    },
+    {
+      "epoch": 0.8723257418909592,
+      "grad_norm": 0.809410035610199,
+      "learning_rate": 4.1928003764246934e-06,
+      "loss": 2.3245,
+      "step": 316
+    },
+    {
+      "epoch": 0.8750862663906143,
+      "grad_norm": 0.5919080972671509,
+      "learning_rate": 4.018685744361539e-06,
+      "loss": 1.9618,
+      "step": 317
+    },
+    {
+      "epoch": 0.8778467908902692,
+      "grad_norm": 0.6320842504501343,
+      "learning_rate": 3.84811200107581e-06,
+      "loss": 1.6811,
+      "step": 318
+    },
+    {
+      "epoch": 0.8806073153899241,
+      "grad_norm": 0.7248905301094055,
+      "learning_rate": 3.6810922819609352e-06,
+      "loss": 2.2784,
+      "step": 319
+    },
+    {
+      "epoch": 0.883367839889579,
+      "grad_norm": 0.5608075261116028,
+      "learning_rate": 3.517639448725163e-06,
+      "loss": 1.6641,
+      "step": 320
+    },
+    {
+      "epoch": 0.8861283643892339,
+      "grad_norm": 0.6745538711547852,
+      "learning_rate": 3.3577660884011485e-06,
+      "loss": 2.1938,
+      "step": 321
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 0.6236209273338318,
+      "learning_rate": 3.2014845123765734e-06,
+      "loss": 1.7981,
+      "step": 322
+    },
+    {
+      "epoch": 0.8916494133885439,
+      "grad_norm": 0.7572169303894043,
+      "learning_rate": 3.0488067554461818e-06,
+      "loss": 1.9801,
+      "step": 323
+    },
+    {
+      "epoch": 0.8944099378881988,
+      "grad_norm": 0.5115662217140198,
+      "learning_rate": 2.8997445748849716e-06,
+      "loss": 1.6648,
+      "step": 324
+    },
+    {
+      "epoch": 0.8971704623878537,
+      "grad_norm": 0.7543956637382507,
+      "learning_rate": 2.7543094495427913e-06,
+      "loss": 1.8281,
+      "step": 325
+    },
+    {
+      "epoch": 0.8999309868875086,
+      "grad_norm": 0.5938908457756042,
+      "learning_rate": 2.612512578960391e-06,
+      "loss": 1.9528,
+      "step": 326
+    },
+    {
+      "epoch": 0.9026915113871635,
+      "grad_norm": 0.6712254881858826,
+      "learning_rate": 2.474364882507002e-06,
+      "loss": 2.0534,
+      "step": 327
+    },
+    {
+      "epoch": 0.9054520358868184,
+      "grad_norm": 0.5828601121902466,
+      "learning_rate": 2.339876998539442e-06,
+      "loss": 1.7284,
+      "step": 328
+    },
+    {
+      "epoch": 0.9082125603864735,
+      "grad_norm": 0.8522250056266785,
+      "learning_rate": 2.2090592835828814e-06,
+      "loss": 2.1892,
+      "step": 329
+    },
+    {
+      "epoch": 0.9109730848861284,
+      "grad_norm": 0.9318490028381348,
+      "learning_rate": 2.081921811533366e-06,
+      "loss": 2.1246,
+      "step": 330
+    },
+    {
+      "epoch": 0.9137336093857833,
+      "grad_norm": 0.7124171257019043,
+      "learning_rate": 1.9584743728819686e-06,
+      "loss": 1.8016,
+      "step": 331
+    },
+    {
+      "epoch": 0.9164941338854382,
+      "grad_norm": 0.9738188982009888,
+      "learning_rate": 1.83872647396095e-06,
+      "loss": 2.3653,
+      "step": 332
+    },
+    {
+      "epoch": 0.9192546583850931,
+      "grad_norm": 0.7715499401092529,
+      "learning_rate": 1.7226873362116257e-06,
+      "loss": 2.4177,
+      "step": 333
+    },
+    {
+      "epoch": 0.9220151828847482,
+      "grad_norm": 0.7315263748168945,
+      "learning_rate": 1.6103658954742918e-06,
+      "loss": 1.8725,
+      "step": 334
+    },
+    {
+      "epoch": 0.9247757073844031,
+      "grad_norm": 0.7532916069030762,
+      "learning_rate": 1.5017708013000786e-06,
+      "loss": 2.2835,
+      "step": 335
+    },
+    {
+      "epoch": 0.927536231884058,
+      "grad_norm": 0.7594407796859741,
+      "learning_rate": 1.396910416284891e-06,
+      "loss": 2.0195,
+      "step": 336
+    },
+    {
+      "epoch": 0.9302967563837129,
+      "grad_norm": 0.9144908785820007,
+      "learning_rate": 1.2957928154254172e-06,
+      "loss": 2.2633,
+      "step": 337
+    },
+    {
+      "epoch": 0.9330572808833678,
+      "grad_norm": 0.7762795090675354,
+      "learning_rate": 1.1984257854973147e-06,
+      "loss": 2.1501,
+      "step": 338
+    },
+    {
+      "epoch": 0.9358178053830227,
+      "grad_norm": 1.0061681270599365,
+      "learning_rate": 1.1048168244555513e-06,
+      "loss": 2.2689,
+      "step": 339
+    },
+    {
+      "epoch": 0.9385783298826778,
+      "grad_norm": 0.7763106822967529,
+      "learning_rate": 1.0149731408569951e-06,
+      "loss": 1.7844,
+      "step": 340
+    },
+    {
+      "epoch": 0.9413388543823327,
+      "grad_norm": 0.7826094031333923,
+      "learning_rate": 9.289016533053696e-07,
+      "loss": 1.741,
+      "step": 341
+    },
+    {
+      "epoch": 0.9440993788819876,
+      "grad_norm": 1.0212883949279785,
+      "learning_rate": 8.46608989918396e-07,
+      "loss": 1.9246,
+      "step": 342
+    },
+    {
+      "epoch": 0.9468599033816425,
+      "grad_norm": 0.7793722748756409,
+      "learning_rate": 7.681014878174187e-07,
+      "loss": 1.6821,
+      "step": 343
+    },
+    {
+      "epoch": 0.9496204278812974,
+      "grad_norm": 0.7322463989257812,
+      "learning_rate": 6.933851926394175e-07,
+      "loss": 1.7588,
+      "step": 344
+    },
+    {
+      "epoch": 0.9523809523809523,
+      "grad_norm": 0.850485622882843,
+      "learning_rate": 6.224658580713971e-07,
+      "loss": 1.9027,
+      "step": 345
+    },
+    {
+      "epoch": 0.9551414768806074,
+      "grad_norm": 0.9016429781913757,
+      "learning_rate": 5.55348945407369e-07,
+      "loss": 1.9853,
+      "step": 346
+    },
+    {
+      "epoch": 0.9579020013802623,
+      "grad_norm": 0.9636366963386536,
+      "learning_rate": 4.920396231277713e-07,
+      "loss": 1.6812,
+      "step": 347
+    },
+    {
+      "epoch": 0.9606625258799172,
+      "grad_norm": 0.7816176414489746,
+      "learning_rate": 4.3254276650143144e-07,
+      "loss": 1.7355,
+      "step": 348
+    },
+    {
+      "epoch": 0.9634230503795721,
+      "grad_norm": 1.2527109384536743,
+      "learning_rate": 3.7686295721018893e-07,
+      "loss": 2.1681,
+      "step": 349
+    },
+    {
+      "epoch": 0.966183574879227,
+      "grad_norm": 2.706392288208008,
+      "learning_rate": 3.2500448299603305e-07,
+      "loss": 2.2018,
+      "step": 350
+    },
+    {
+      "epoch": 0.968944099378882,
+      "grad_norm": 0.5852137804031372,
+      "learning_rate": 2.769713373309168e-07,
+      "loss": 2.2764,
+      "step": 351
+    },
+    {
+      "epoch": 0.971704623878537,
+      "grad_norm": 0.5416120290756226,
+      "learning_rate": 2.3276721910926448e-07,
+      "loss": 2.1675,
+      "step": 352
+    },
+    {
+      "epoch": 0.9744651483781919,
+      "grad_norm": 0.5589115619659424,
+      "learning_rate": 1.923955323630877e-07,
+      "loss": 2.0924,
+      "step": 353
+    },
+    {
+      "epoch": 0.9772256728778468,
+      "grad_norm": 0.582391619682312,
+      "learning_rate": 1.5585938599989025e-07,
+      "loss": 1.9631,
+      "step": 354
+    },
+    {
+      "epoch": 0.9799861973775017,
+      "grad_norm": 0.6033303141593933,
+      "learning_rate": 1.231615935632313e-07,
+      "loss": 1.5731,
+      "step": 355
+    },
+    {
+      "epoch": 0.9827467218771566,
+      "grad_norm": 0.7292796969413757,
+      "learning_rate": 9.430467301607682e-08,
+      "loss": 2.1532,
+      "step": 356
+    },
+    {
+      "epoch": 0.9855072463768116,
+      "grad_norm": 0.768563449382782,
+      "learning_rate": 6.929084654688222e-08,
+      "loss": 2.1209,
+      "step": 357
+    },
+    {
+      "epoch": 0.9882677708764666,
+      "grad_norm": 0.7994271516799927,
+      "learning_rate": 4.8122040398496105e-08,
+      "loss": 2.1563,
+      "step": 358
+    },
+    {
+      "epoch": 0.9910282953761215,
+      "grad_norm": 0.800473690032959,
+      "learning_rate": 3.0799884719795444e-08,
+      "loss": 2.2567,
+      "step": 359
+    },
+    {
+      "epoch": 0.9937888198757764,
+      "grad_norm": 0.6938081383705139,
+      "learning_rate": 1.7325713440180524e-08,
+      "loss": 1.8412,
+      "step": 360
+    },
+    {
+      "epoch": 0.9965493443754313,
+      "grad_norm": 1.0545095205307007,
+      "learning_rate": 7.700564166834844e-09,
+      "loss": 2.372,
+      "step": 361
+    },
+    {
+      "epoch": 0.9993098688750862,
+      "grad_norm": 0.8724227547645569,
+      "learning_rate": 1.9251781048168493e-09,
+      "loss": 1.7168,
+      "step": 362
+    },
+    {
+      "epoch": 0.9993098688750862,
+      "eval_loss": 2.0619454383850098,
+      "eval_runtime": 5.2483,
+      "eval_samples_per_second": 58.114,
+      "eval_steps_per_second": 14.671,
+      "step": 362
+    },
+    {
+      "epoch": 1.0020703933747412,
+      "grad_norm": 2.523097276687622,
+      "learning_rate": 0.0,
+      "loss": 4.4299,
+      "step": 363
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.138052632608768e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null