Training in progress, step 552, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +368 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a5302ed989b61bbf3c5a89d0ce9797280641e435ec2e9f80f205c1902119adb
 size 144805440

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f5adb3a5e16f935dab431d5c7fc988b0cb1691a418940636f10652d62b82db7
 size 144805440

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2280588a779f8600beb802cf022367cb692da9b34ac083f171a98b0ae124ae64
 size 74292308

 version https://git-lfs.github.com/spec/v1
+oid sha256:00a072bec46c4c9546123646ed3daf91f92526592704bf014cc7ef83d90b4303
 size 74292308

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79d41988296ff7641dd7197ea779bba77e30e921990ba55a18499232a9f57fb0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e1aa12142861e36681d375c8477ba6f862e4a421988518266fc08b6e26330f9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9dc8764f26b1f60541ddcf4504ff4cf226063bff9c4b473f4392b732534a0b84
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf3863c8948e41c40f37017b57744f08214b2c38da69d49fe98001649774bc48
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.22141695022583,
   "best_model_checkpoint": "miner_id_24/checkpoint-500",
-  "epoch": 0.06356521393667315,
   "eval_steps": 100,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3555,6 +3555,370 @@
       "eval_samples_per_second": 3.977,
       "eval_steps_per_second": 0.994,
       "step": 500
     }
   ],
   "logging_steps": 1,
@@ -3578,12 +3942,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.182000113647616e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.22141695022583,
   "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.07017599618608716,
   "eval_steps": 100,
+  "global_step": 552,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.977,
       "eval_steps_per_second": 0.994,
       "step": 500
+    },
+    {
+      "epoch": 0.0636923443645465,
+      "grad_norm": 0.20111538469791412,
+      "learning_rate": 4.337571235498628e-06,
+      "loss": 1.2477,
+      "step": 501
+    },
+    {
+      "epoch": 0.06381947479241985,
+      "grad_norm": 0.19941484928131104,
+      "learning_rate": 4.170318841070708e-06,
+      "loss": 1.2335,
+      "step": 502
+    },
+    {
+      "epoch": 0.06394660522029319,
+      "grad_norm": 0.19625377655029297,
+      "learning_rate": 4.00628603241544e-06,
+      "loss": 1.2302,
+      "step": 503
+    },
+    {
+      "epoch": 0.06407373564816654,
+      "grad_norm": 0.2074848711490631,
+      "learning_rate": 3.845478320536178e-06,
+      "loss": 1.22,
+      "step": 504
+    },
+    {
+      "epoch": 0.06420086607603989,
+      "grad_norm": 0.19643086194992065,
+      "learning_rate": 3.687901108082892e-06,
+      "loss": 1.2234,
+      "step": 505
+    },
+    {
+      "epoch": 0.06432799650391323,
+      "grad_norm": 0.19488604366779327,
+      "learning_rate": 3.53355968917054e-06,
+      "loss": 1.2088,
+      "step": 506
+    },
+    {
+      "epoch": 0.06445512693178658,
+      "grad_norm": 0.20913287997245789,
+      "learning_rate": 3.3824592492013085e-06,
+      "loss": 1.1734,
+      "step": 507
+    },
+    {
+      "epoch": 0.06458225735965993,
+      "grad_norm": 0.19445528090000153,
+      "learning_rate": 3.2346048646903494e-06,
+      "loss": 1.2014,
+      "step": 508
+    },
+    {
+      "epoch": 0.06470938778753327,
+      "grad_norm": 0.19821353256702423,
+      "learning_rate": 3.0900015030951744e-06,
+      "loss": 1.2446,
+      "step": 509
+    },
+    {
+      "epoch": 0.06483651821540662,
+      "grad_norm": 0.20283670723438263,
+      "learning_rate": 2.9486540226488557e-06,
+      "loss": 1.1858,
+      "step": 510
+    },
+    {
+      "epoch": 0.06496364864327997,
+      "grad_norm": 0.20396962761878967,
+      "learning_rate": 2.8105671721967875e-06,
+      "loss": 1.2166,
+      "step": 511
+    },
+    {
+      "epoch": 0.0650907790711533,
+      "grad_norm": 0.2022467404603958,
+      "learning_rate": 2.6757455910370488e-06,
+      "loss": 1.1738,
+      "step": 512
+    },
+    {
+      "epoch": 0.06521790949902666,
+      "grad_norm": 0.1929662674665451,
+      "learning_rate": 2.5441938087646612e-06,
+      "loss": 1.205,
+      "step": 513
+    },
+    {
+      "epoch": 0.06534503992690001,
+      "grad_norm": 0.19997857511043549,
+      "learning_rate": 2.4159162451193097e-06,
+      "loss": 1.2103,
+      "step": 514
+    },
+    {
+      "epoch": 0.06547217035477335,
+      "grad_norm": 0.1969158947467804,
+      "learning_rate": 2.290917209836918e-06,
+      "loss": 1.1912,
+      "step": 515
+    },
+    {
+      "epoch": 0.0655993007826467,
+      "grad_norm": 0.19717784225940704,
+      "learning_rate": 2.1692009025048422e-06,
+      "loss": 1.2374,
+      "step": 516
+    },
+    {
+      "epoch": 0.06572643121052005,
+      "grad_norm": 0.20149968564510345,
+      "learning_rate": 2.0507714124207157e-06,
+      "loss": 1.1857,
+      "step": 517
+    },
+    {
+      "epoch": 0.06585356163839338,
+      "grad_norm": 0.19946229457855225,
+      "learning_rate": 1.9356327184551714e-06,
+      "loss": 1.1947,
+      "step": 518
+    },
+    {
+      "epoch": 0.06598069206626674,
+      "grad_norm": 0.19773566722869873,
+      "learning_rate": 1.8237886889180489e-06,
+      "loss": 1.2825,
+      "step": 519
+    },
+    {
+      "epoch": 0.06610782249414009,
+      "grad_norm": 0.20150107145309448,
+      "learning_rate": 1.7152430814285303e-06,
+      "loss": 1.2352,
+      "step": 520
+    },
+    {
+      "epoch": 0.06623495292201342,
+      "grad_norm": 0.20660698413848877,
+      "learning_rate": 1.6099995427888315e-06,
+      "loss": 1.2127,
+      "step": 521
+    },
+    {
+      "epoch": 0.06636208334988677,
+      "grad_norm": 0.19696985185146332,
+      "learning_rate": 1.5080616088616884e-06,
+      "loss": 1.2197,
+      "step": 522
+    },
+    {
+      "epoch": 0.06648921377776013,
+      "grad_norm": 0.20150014758110046,
+      "learning_rate": 1.4094327044515853e-06,
+      "loss": 1.2534,
+      "step": 523
+    },
+    {
+      "epoch": 0.06661634420563346,
+      "grad_norm": 0.1950562745332718,
+      "learning_rate": 1.3141161431896808e-06,
+      "loss": 1.165,
+      "step": 524
+    },
+    {
+      "epoch": 0.06674347463350681,
+      "grad_norm": 0.20011726021766663,
+      "learning_rate": 1.222115127422485e-06,
+      "loss": 1.2179,
+      "step": 525
+    },
+    {
+      "epoch": 0.06687060506138016,
+      "grad_norm": 0.20083405077457428,
+      "learning_rate": 1.1334327481042573e-06,
+      "loss": 1.305,
+      "step": 526
+    },
+    {
+      "epoch": 0.0669977354892535,
+      "grad_norm": 0.20200292766094208,
+      "learning_rate": 1.0480719846931774e-06,
+      "loss": 1.2263,
+      "step": 527
+    },
+    {
+      "epoch": 0.06712486591712685,
+      "grad_norm": 0.20572660863399506,
+      "learning_rate": 9.660357050512158e-07,
+      "loss": 1.2029,
+      "step": 528
+    },
+    {
+      "epoch": 0.0672519963450002,
+      "grad_norm": 0.20432178676128387,
+      "learning_rate": 8.873266653478208e-07,
+      "loss": 1.2703,
+      "step": 529
+    },
+    {
+      "epoch": 0.06737912677287354,
+      "grad_norm": 0.1951807290315628,
+      "learning_rate": 8.119475099673036e-07,
+      "loss": 1.2298,
+      "step": 530
+    },
+    {
+      "epoch": 0.06750625720074689,
+      "grad_norm": 0.2002389281988144,
+      "learning_rate": 7.399007714199658e-07,
+      "loss": 1.2342,
+      "step": 531
+    },
+    {
+      "epoch": 0.06763338762862024,
+      "grad_norm": 0.19941021502017975,
+      "learning_rate": 6.711888702570556e-07,
+      "loss": 1.146,
+      "step": 532
+    },
+    {
+      "epoch": 0.06776051805649358,
+      "grad_norm": 0.19345982372760773,
+      "learning_rate": 6.058141149894336e-07,
+      "loss": 1.1954,
+      "step": 533
+    },
+    {
+      "epoch": 0.06788764848436693,
+      "grad_norm": 0.1961802840232849,
+      "learning_rate": 5.437787020100115e-07,
+      "loss": 1.2165,
+      "step": 534
+    },
+    {
+      "epoch": 0.06801477891224028,
+      "grad_norm": 0.20169439911842346,
+      "learning_rate": 4.850847155199567e-07,
+      "loss": 1.2445,
+      "step": 535
+    },
+    {
+      "epoch": 0.06814190934011362,
+      "grad_norm": 0.19823016226291656,
+      "learning_rate": 4.297341274586475e-07,
+      "loss": 1.2371,
+      "step": 536
+    },
+    {
+      "epoch": 0.06826903976798697,
+      "grad_norm": 0.2043391764163971,
+      "learning_rate": 3.777287974374932e-07,
+      "loss": 1.2942,
+      "step": 537
+    },
+    {
+      "epoch": 0.06839617019586032,
+      "grad_norm": 0.20149071514606476,
+      "learning_rate": 3.290704726773619e-07,
+      "loss": 1.1842,
+      "step": 538
+    },
+    {
+      "epoch": 0.06852330062373366,
+      "grad_norm": 0.20085620880126953,
+      "learning_rate": 2.837607879499604e-07,
+      "loss": 1.1982,
+      "step": 539
+    },
+    {
+      "epoch": 0.06865043105160701,
+      "grad_norm": 0.2070370465517044,
+      "learning_rate": 2.418012655228452e-07,
+      "loss": 1.211,
+      "step": 540
+    },
+    {
+      "epoch": 0.06877756147948036,
+      "grad_norm": 0.2030269056558609,
+      "learning_rate": 2.0319331510835205e-07,
+      "loss": 1.2534,
+      "step": 541
+    },
+    {
+      "epoch": 0.0689046919073537,
+      "grad_norm": 0.1966077983379364,
+      "learning_rate": 1.6793823381614505e-07,
+      "loss": 1.1683,
+      "step": 542
+    },
+    {
+      "epoch": 0.06903182233522705,
+      "grad_norm": 0.205659419298172,
+      "learning_rate": 1.3603720610972925e-07,
+      "loss": 1.141,
+      "step": 543
+    },
+    {
+      "epoch": 0.0691589527631004,
+      "grad_norm": 0.2047136127948761,
+      "learning_rate": 1.0749130376659366e-07,
+      "loss": 1.2415,
+      "step": 544
+    },
+    {
+      "epoch": 0.06928608319097374,
+      "grad_norm": 0.2038879543542862,
+      "learning_rate": 8.230148584219554e-08,
+      "loss": 1.2148,
+      "step": 545
+    },
+    {
+      "epoch": 0.06941321361884709,
+      "grad_norm": 0.19984766840934753,
+      "learning_rate": 6.046859863781951e-08,
+      "loss": 1.1954,
+      "step": 546
+    },
+    {
+      "epoch": 0.06954034404672044,
+      "grad_norm": 0.2025536447763443,
+      "learning_rate": 4.199337567203365e-08,
+      "loss": 1.2076,
+      "step": 547
+    },
+    {
+      "epoch": 0.06966747447459377,
+      "grad_norm": 0.2022514045238495,
+      "learning_rate": 2.6876437656153665e-08,
+      "loss": 1.2369,
+      "step": 548
+    },
+    {
+      "epoch": 0.06979460490246713,
+      "grad_norm": 0.21228355169296265,
+      "learning_rate": 1.5118292473292885e-08,
+      "loss": 1.2304,
+      "step": 549
+    },
+    {
+      "epoch": 0.06992173533034048,
+      "grad_norm": 0.20524141192436218,
+      "learning_rate": 6.719335161364804e-09,
+      "loss": 1.2158,
+      "step": 550
+    },
+    {
+      "epoch": 0.07004886575821381,
+      "grad_norm": 0.18994790315628052,
+      "learning_rate": 1.6798478997825939e-09,
+      "loss": 1.1945,
+      "step": 551
+    },
+    {
+      "epoch": 0.07017599618608716,
+      "grad_norm": 0.20824959874153137,
+      "learning_rate": 0.0,
+      "loss": 1.241,
+      "step": 552
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.720928125466968e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null