Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7232ce445ce21794ff0daba2317b9407ca9b1a45f6ac3b44c1e692d3b441130e
 size 138995824

 version https://git-lfs.github.com/spec/v1
+oid sha256:da4c89f143d999124bdacb9b290d298d78cc505e509f2a51a37ccca25790d307
 size 138995824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a726437f3198f8765c2ea68a03755577cdd399dd24ff50ea7cef356632a7236f
 size 71078228

 version https://git-lfs.github.com/spec/v1
+oid sha256:be976d6e61c6ab4c3d5148baf847995e427bc36835857c0cce996e287c5d3bf0
 size 71078228

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9fc6b6fb38394dadb9326af0c9bc16f8c4534d68a42c487cddd006f3b0a51896
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9ed452989e5e79c151afa0a4957fcce7781a6a4c9b513d77b361538688e55a2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9096f15f02bac6b0fc27aa7aa4986f85d87d53fca310a75657e0015357af5c5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:705cabf5cbc3a6ab0feb67c77b9b453d59efcc939ce90d310af96e621810f990
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2723369598388672,
-  "best_model_checkpoint": "miner_id_24/checkpoint-350",
-  "epoch": 0.019652156824211457,
   "eval_steps": 50,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2521,6 +2521,364 @@
       "eval_samples_per_second": 60.085,
       "eval_steps_per_second": 15.022,
       "step": 350
     }
   ],
   "logging_steps": 1,
@@ -2544,12 +2902,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.4041078390784e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.271822214126587,
+  "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 0.022459607799098807,
   "eval_steps": 50,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 60.085,
       "eval_steps_per_second": 15.022,
       "step": 350
+    },
+    {
+      "epoch": 0.019708305843709206,
+      "grad_norm": 0.36800873279571533,
+      "learning_rate": 3.844650207332562e-06,
+      "loss": 1.3682,
+      "step": 351
+    },
+    {
+      "epoch": 0.01976445486320695,
+      "grad_norm": 0.3377681076526642,
+      "learning_rate": 3.691267552111183e-06,
+      "loss": 1.3038,
+      "step": 352
+    },
+    {
+      "epoch": 0.019820603882704697,
+      "grad_norm": 0.34921079874038696,
+      "learning_rate": 3.54088980417534e-06,
+      "loss": 1.2905,
+      "step": 353
+    },
+    {
+      "epoch": 0.019876752902202446,
+      "grad_norm": 0.3415435552597046,
+      "learning_rate": 3.393526721321616e-06,
+      "loss": 1.3048,
+      "step": 354
+    },
+    {
+      "epoch": 0.01993290192170019,
+      "grad_norm": 0.385629802942276,
+      "learning_rate": 3.249187865729264e-06,
+      "loss": 1.2855,
+      "step": 355
+    },
+    {
+      "epoch": 0.01998905094119794,
+      "grad_norm": 0.33783087134361267,
+      "learning_rate": 3.1078826033397843e-06,
+      "loss": 1.2968,
+      "step": 356
+    },
+    {
+      "epoch": 0.020045199960695686,
+      "grad_norm": 0.37677091360092163,
+      "learning_rate": 2.9696201032491434e-06,
+      "loss": 1.3106,
+      "step": 357
+    },
+    {
+      "epoch": 0.020101348980193435,
+      "grad_norm": 0.3581361770629883,
+      "learning_rate": 2.8344093371128424e-06,
+      "loss": 1.343,
+      "step": 358
+    },
+    {
+      "epoch": 0.02015749799969118,
+      "grad_norm": 0.34285494685173035,
+      "learning_rate": 2.70225907856374e-06,
+      "loss": 1.3371,
+      "step": 359
+    },
+    {
+      "epoch": 0.020213647019188926,
+      "grad_norm": 0.35909414291381836,
+      "learning_rate": 2.573177902642726e-06,
+      "loss": 1.3167,
+      "step": 360
+    },
+    {
+      "epoch": 0.020269796038686675,
+      "grad_norm": 0.3384222388267517,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.2706,
+      "step": 361
+    },
+    {
+      "epoch": 0.02032594505818442,
+      "grad_norm": 0.3651668131351471,
+      "learning_rate": 2.324256102563188e-06,
+      "loss": 1.3289,
+      "step": 362
+    },
+    {
+      "epoch": 0.02038209407768217,
+      "grad_norm": 0.34311142563819885,
+      "learning_rate": 2.204431630583548e-06,
+      "loss": 1.2932,
+      "step": 363
+    },
+    {
+      "epoch": 0.020438243097179915,
+      "grad_norm": 0.3421182632446289,
+      "learning_rate": 2.087708544541689e-06,
+      "loss": 1.2895,
+      "step": 364
+    },
+    {
+      "epoch": 0.020494392116677664,
+      "grad_norm": 0.364944726228714,
+      "learning_rate": 1.974094418431388e-06,
+      "loss": 1.3483,
+      "step": 365
+    },
+    {
+      "epoch": 0.02055054113617541,
+      "grad_norm": 0.3558295667171478,
+      "learning_rate": 1.8635966245104664e-06,
+      "loss": 1.3057,
+      "step": 366
+    },
+    {
+      "epoch": 0.020606690155673155,
+      "grad_norm": 0.35519397258758545,
+      "learning_rate": 1.7562223328224325e-06,
+      "loss": 1.3091,
+      "step": 367
+    },
+    {
+      "epoch": 0.020662839175170904,
+      "grad_norm": 0.3448637127876282,
+      "learning_rate": 1.6519785107311891e-06,
+      "loss": 1.2512,
+      "step": 368
+    },
+    {
+      "epoch": 0.02071898819466865,
+      "grad_norm": 0.3545093238353729,
+      "learning_rate": 1.5508719224689717e-06,
+      "loss": 1.3812,
+      "step": 369
+    },
+    {
+      "epoch": 0.0207751372141664,
+      "grad_norm": 0.35398516058921814,
+      "learning_rate": 1.4529091286973995e-06,
+      "loss": 1.3211,
+      "step": 370
+    },
+    {
+      "epoch": 0.020831286233664145,
+      "grad_norm": 0.3714781403541565,
+      "learning_rate": 1.358096486081778e-06,
+      "loss": 1.333,
+      "step": 371
+    },
+    {
+      "epoch": 0.02088743525316189,
+      "grad_norm": 0.35507217049598694,
+      "learning_rate": 1.2664401468786114e-06,
+      "loss": 1.3185,
+      "step": 372
+    },
+    {
+      "epoch": 0.02094358427265964,
+      "grad_norm": 0.3716376721858978,
+      "learning_rate": 1.1779460585363944e-06,
+      "loss": 1.2996,
+      "step": 373
+    },
+    {
+      "epoch": 0.020999733292157385,
+      "grad_norm": 0.36729657649993896,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.2479,
+      "step": 374
+    },
+    {
+      "epoch": 0.021055882311655134,
+      "grad_norm": 0.35884541273117065,
+      "learning_rate": 1.0104673978866164e-06,
+      "loss": 1.3301,
+      "step": 375
+    },
+    {
+      "epoch": 0.02111203133115288,
+      "grad_norm": 0.3605683445930481,
+      "learning_rate": 9.314936930293283e-07,
+      "loss": 1.2289,
+      "step": 376
+    },
+    {
+      "epoch": 0.02116818035065063,
+      "grad_norm": 0.3460223078727722,
+      "learning_rate": 8.557039732283944e-07,
+      "loss": 1.237,
+      "step": 377
+    },
+    {
+      "epoch": 0.021224329370148374,
+      "grad_norm": 0.3702046275138855,
+      "learning_rate": 7.83103156370113e-07,
+      "loss": 1.2009,
+      "step": 378
+    },
+    {
+      "epoch": 0.02128047838964612,
+      "grad_norm": 0.36723971366882324,
+      "learning_rate": 7.136959534174592e-07,
+      "loss": 1.2211,
+      "step": 379
+    },
+    {
+      "epoch": 0.02133662740914387,
+      "grad_norm": 0.37710997462272644,
+      "learning_rate": 6.474868681043578e-07,
+      "loss": 1.2392,
+      "step": 380
+    },
+    {
+      "epoch": 0.021392776428641614,
+      "grad_norm": 0.3700467348098755,
+      "learning_rate": 5.844801966434832e-07,
+      "loss": 1.2476,
+      "step": 381
+    },
+    {
+      "epoch": 0.021448925448139363,
+      "grad_norm": 0.3700866997241974,
+      "learning_rate": 5.246800274474439e-07,
+      "loss": 1.2151,
+      "step": 382
+    },
+    {
+      "epoch": 0.02150507446763711,
+      "grad_norm": 0.4157610237598419,
+      "learning_rate": 4.680902408635335e-07,
+      "loss": 1.4174,
+      "step": 383
+    },
+    {
+      "epoch": 0.021561223487134854,
+      "grad_norm": 0.39180654287338257,
+      "learning_rate": 4.1471450892189846e-07,
+      "loss": 1.3271,
+      "step": 384
+    },
+    {
+      "epoch": 0.021617372506632603,
+      "grad_norm": 0.3855951130390167,
+      "learning_rate": 3.6455629509730136e-07,
+      "loss": 1.2205,
+      "step": 385
+    },
+    {
+      "epoch": 0.02167352152613035,
+      "grad_norm": 0.3870358169078827,
+      "learning_rate": 3.1761885408435054e-07,
+      "loss": 1.2145,
+      "step": 386
+    },
+    {
+      "epoch": 0.021729670545628098,
+      "grad_norm": 0.40940991044044495,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.3489,
+      "step": 387
+    },
+    {
+      "epoch": 0.021785819565125843,
+      "grad_norm": 0.380584180355072,
+      "learning_rate": 2.334182641175686e-07,
+      "loss": 1.2265,
+      "step": 388
+    },
+    {
+      "epoch": 0.021841968584623592,
+      "grad_norm": 0.41339078545570374,
+      "learning_rate": 1.9616057881935436e-07,
+      "loss": 1.2366,
+      "step": 389
+    },
+    {
+      "epoch": 0.021898117604121338,
+      "grad_norm": 0.39812958240509033,
+      "learning_rate": 1.6213459328950352e-07,
+      "loss": 1.2635,
+      "step": 390
+    },
+    {
+      "epoch": 0.021954266623619084,
+      "grad_norm": 0.42198699712753296,
+      "learning_rate": 1.3134251542544774e-07,
+      "loss": 1.2593,
+      "step": 391
+    },
+    {
+      "epoch": 0.022010415643116833,
+      "grad_norm": 0.4326508641242981,
+      "learning_rate": 1.0378634328099269e-07,
+      "loss": 1.1585,
+      "step": 392
+    },
+    {
+      "epoch": 0.022066564662614578,
+      "grad_norm": 0.4443672001361847,
+      "learning_rate": 7.946786493666647e-08,
+      "loss": 1.184,
+      "step": 393
+    },
+    {
+      "epoch": 0.022122713682112327,
+      "grad_norm": 0.41310712695121765,
+      "learning_rate": 5.838865838366792e-08,
+      "loss": 1.1833,
+      "step": 394
+    },
+    {
+      "epoch": 0.022178862701610073,
+      "grad_norm": 0.4295123219490051,
+      "learning_rate": 4.055009142152067e-08,
+      "loss": 1.2364,
+      "step": 395
+    },
+    {
+      "epoch": 0.02223501172110782,
+      "grad_norm": 0.45007407665252686,
+      "learning_rate": 2.595332156925534e-08,
+      "loss": 1.226,
+      "step": 396
+    },
+    {
+      "epoch": 0.022291160740605567,
+      "grad_norm": 0.48439890146255493,
+      "learning_rate": 1.4599295990352924e-08,
+      "loss": 1.1626,
+      "step": 397
+    },
+    {
+      "epoch": 0.022347309760103313,
+      "grad_norm": 0.4942685067653656,
+      "learning_rate": 6.488751431266149e-09,
+      "loss": 1.1715,
+      "step": 398
+    },
+    {
+      "epoch": 0.022403458779601062,
+      "grad_norm": 0.45637187361717224,
+      "learning_rate": 1.622214173602199e-09,
+      "loss": 1.1143,
+      "step": 399
+    },
+    {
+      "epoch": 0.022459607799098807,
+      "grad_norm": 0.6239014267921448,
+      "learning_rate": 0.0,
+      "loss": 0.9608,
+      "step": 400
+    },
+    {
+      "epoch": 0.022459607799098807,
+      "eval_loss": 1.271822214126587,
+      "eval_runtime": 498.9496,
+      "eval_samples_per_second": 60.116,
+      "eval_steps_per_second": 15.03,
+      "step": 400
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.7475518160896e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null