Training in progress, step 2652, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +368 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60a8247fd6ba1f9fe0dbc2ab7765133457470ca6d3a44c71bf94f3d4c10f9e4b
 size 58680

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf1889ef54561b0d470d85469e087b94d1371deb5fb4f4a1453244b0fbaf7b20
 size 58680

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba646ce7e6e999342a8ea23619ed724cecef86f809b7b64b8e29bc9e76ab814c
 size 127270

 version https://git-lfs.github.com/spec/v1
+oid sha256:28ed220a640223cfeb97a4344a0b768d38922625171661b1587c8b8e38e97fdc
 size 127270

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da464f1ab8886439566680dd97216fa326d723519bedac0470d5e0944caeac13
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:baff5d24dc0ac30e308dd3f1756ee34ebed09385f83a67c74ff17f238a33f7e6
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c97ae609b712bd37f007cb4326617d16b954d56d87dcc6a35ec5b0ba67ade88b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3cac0cec479cc76a5d9126abb2015be2132cd8378ba61d1add8aa7d534c79d05
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 10.27136516571045,
   "best_model_checkpoint": "miner_id_24/checkpoint-2600",
-  "epoch": 1.7828434313137373,
   "eval_steps": 100,
-  "global_step": 2600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -18423,6 +18423,370 @@
       "eval_samples_per_second": 336.607,
       "eval_steps_per_second": 84.282,
       "step": 2600
     }
   ],
   "logging_steps": 1,
@@ -18446,12 +18810,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1074706513920000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 10.27136516571045,
   "best_model_checkpoint": "miner_id_24/checkpoint-2600",
+  "epoch": 1.8184934441683092,
   "eval_steps": 100,
+  "global_step": 2652,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 336.607,
       "eval_steps_per_second": 84.282,
       "step": 2600
+    },
+    {
+      "epoch": 1.7835290084840176,
+      "grad_norm": 0.6825293302536011,
+      "learning_rate": 1.8382787692204917e-07,
+      "loss": 82.2117,
+      "step": 2601
+    },
+    {
+      "epoch": 1.7842145856542977,
+      "grad_norm": 0.8272702097892761,
+      "learning_rate": 1.7669171929617366e-07,
+      "loss": 82.1916,
+      "step": 2602
+    },
+    {
+      "epoch": 1.784900162824578,
+      "grad_norm": 0.5631828904151917,
+      "learning_rate": 1.6969670684205963e-07,
+      "loss": 82.2169,
+      "step": 2603
+    },
+    {
+      "epoch": 1.785585739994858,
+      "grad_norm": 0.6313933730125427,
+      "learning_rate": 1.6284284945029537e-07,
+      "loss": 82.264,
+      "step": 2604
+    },
+    {
+      "epoch": 1.7862713171651383,
+      "grad_norm": 0.8243266940116882,
+      "learning_rate": 1.5613015681189558e-07,
+      "loss": 82.2106,
+      "step": 2605
+    },
+    {
+      "epoch": 1.7869568943354186,
+      "grad_norm": 0.6138453483581543,
+      "learning_rate": 1.4955863841827898e-07,
+      "loss": 82.191,
+      "step": 2606
+    },
+    {
+      "epoch": 1.7876424715056989,
+      "grad_norm": 0.6013806462287903,
+      "learning_rate": 1.431283035612352e-07,
+      "loss": 82.1591,
+      "step": 2607
+    },
+    {
+      "epoch": 1.7883280486759792,
+      "grad_norm": 0.6489602327346802,
+      "learning_rate": 1.3683916133293562e-07,
+      "loss": 82.2066,
+      "step": 2608
+    },
+    {
+      "epoch": 1.7890136258462594,
+      "grad_norm": 0.7366928458213806,
+      "learning_rate": 1.3069122062591144e-07,
+      "loss": 82.1006,
+      "step": 2609
+    },
+    {
+      "epoch": 1.7896992030165395,
+      "grad_norm": 0.6633123159408569,
+      "learning_rate": 1.246844901330535e-07,
+      "loss": 82.1569,
+      "step": 2610
+    },
+    {
+      "epoch": 1.7903847801868198,
+      "grad_norm": 0.6651855111122131,
+      "learning_rate": 1.1881897834757904e-07,
+      "loss": 82.2778,
+      "step": 2611
+    },
+    {
+      "epoch": 1.7910703573570999,
+      "grad_norm": 0.6125953197479248,
+      "learning_rate": 1.130946935630095e-07,
+      "loss": 82.2915,
+      "step": 2612
+    },
+    {
+      "epoch": 1.7917559345273801,
+      "grad_norm": 0.6645113825798035,
+      "learning_rate": 1.0751164387322599e-07,
+      "loss": 82.2116,
+      "step": 2613
+    },
+    {
+      "epoch": 1.7924415116976604,
+      "grad_norm": 0.6014000177383423,
+      "learning_rate": 1.0206983717235829e-07,
+      "loss": 82.174,
+      "step": 2614
+    },
+    {
+      "epoch": 1.7931270888679407,
+      "grad_norm": 0.6267678737640381,
+      "learning_rate": 9.676928115486261e-08,
+      "loss": 82.2418,
+      "step": 2615
+    },
+    {
+      "epoch": 1.793812666038221,
+      "grad_norm": 0.6758940815925598,
+      "learning_rate": 9.1609983315466e-08,
+      "loss": 82.2699,
+      "step": 2616
+    },
+    {
+      "epoch": 1.7944982432085013,
+      "grad_norm": 0.6676249504089355,
+      "learning_rate": 8.659195094913308e-08,
+      "loss": 82.1648,
+      "step": 2617
+    },
+    {
+      "epoch": 1.7951838203787815,
+      "grad_norm": 0.5303319692611694,
+      "learning_rate": 8.171519115113268e-08,
+      "loss": 82.1988,
+      "step": 2618
+    },
+    {
+      "epoch": 1.7958693975490616,
+      "grad_norm": 0.9017250537872314,
+      "learning_rate": 7.6979710816949e-08,
+      "loss": 82.2253,
+      "step": 2619
+    },
+    {
+      "epoch": 1.796554974719342,
+      "grad_norm": 0.593103289604187,
+      "learning_rate": 7.238551664232596e-08,
+      "loss": 82.2429,
+      "step": 2620
+    },
+    {
+      "epoch": 1.797240551889622,
+      "grad_norm": 0.859890341758728,
+      "learning_rate": 6.79326151232118e-08,
+      "loss": 82.1984,
+      "step": 2621
+    },
+    {
+      "epoch": 1.7979261290599022,
+      "grad_norm": 0.6514622569084167,
+      "learning_rate": 6.362101255579233e-08,
+      "loss": 82.1653,
+      "step": 2622
+    },
+    {
+      "epoch": 1.7986117062301825,
+      "grad_norm": 0.6877514719963074,
+      "learning_rate": 5.945071503645761e-08,
+      "loss": 82.243,
+      "step": 2623
+    },
+    {
+      "epoch": 1.7992972834004628,
+      "grad_norm": 0.6594268083572388,
+      "learning_rate": 5.542172846180194e-08,
+      "loss": 82.2962,
+      "step": 2624
+    },
+    {
+      "epoch": 1.799982860570743,
+      "grad_norm": 0.8793612718582153,
+      "learning_rate": 5.1534058528612817e-08,
+      "loss": 82.1784,
+      "step": 2625
+    },
+    {
+      "epoch": 1.8006684377410234,
+      "grad_norm": 0.6758993864059448,
+      "learning_rate": 4.7787710733859794e-08,
+      "loss": 82.1447,
+      "step": 2626
+    },
+    {
+      "epoch": 1.8013540149113034,
+      "grad_norm": 0.7371706962585449,
+      "learning_rate": 4.418269037468337e-08,
+      "loss": 82.266,
+      "step": 2627
+    },
+    {
+      "epoch": 1.8020395920815837,
+      "grad_norm": 0.7626631259918213,
+      "learning_rate": 4.071900254841721e-08,
+      "loss": 82.238,
+      "step": 2628
+    },
+    {
+      "epoch": 1.8027251692518638,
+      "grad_norm": 0.7885541915893555,
+      "learning_rate": 3.739665215253263e-08,
+      "loss": 82.2318,
+      "step": 2629
+    },
+    {
+      "epoch": 1.803410746422144,
+      "grad_norm": 0.5564078092575073,
+      "learning_rate": 3.4215643884660807e-08,
+      "loss": 82.216,
+      "step": 2630
+    },
+    {
+      "epoch": 1.8040963235924243,
+      "grad_norm": 0.7523298859596252,
+      "learning_rate": 3.1175982242603876e-08,
+      "loss": 82.1603,
+      "step": 2631
+    },
+    {
+      "epoch": 1.8047819007627046,
+      "grad_norm": 0.7048127055168152,
+      "learning_rate": 2.8277671524290506e-08,
+      "loss": 82.223,
+      "step": 2632
+    },
+    {
+      "epoch": 1.805467477932985,
+      "grad_norm": 0.6412562727928162,
+      "learning_rate": 2.552071582777593e-08,
+      "loss": 82.2456,
+      "step": 2633
+    },
+    {
+      "epoch": 1.8061530551032652,
+      "grad_norm": 0.610815703868866,
+      "learning_rate": 2.2905119051264135e-08,
+      "loss": 82.2092,
+      "step": 2634
+    },
+    {
+      "epoch": 1.8068386322735452,
+      "grad_norm": 0.7834846377372742,
+      "learning_rate": 2.0430884893074544e-08,
+      "loss": 82.243,
+      "step": 2635
+    },
+    {
+      "epoch": 1.8075242094438255,
+      "grad_norm": 0.6227961778640747,
+      "learning_rate": 1.809801685165313e-08,
+      "loss": 82.3004,
+      "step": 2636
+    },
+    {
+      "epoch": 1.8082097866141056,
+      "grad_norm": 0.7231442928314209,
+      "learning_rate": 1.590651822556133e-08,
+      "loss": 82.1649,
+      "step": 2637
+    },
+    {
+      "epoch": 1.8088953637843859,
+      "grad_norm": 0.5944806933403015,
+      "learning_rate": 1.38563921134649e-08,
+      "loss": 82.1783,
+      "step": 2638
+    },
+    {
+      "epoch": 1.8095809409546662,
+      "grad_norm": 0.7193268537521362,
+      "learning_rate": 1.1947641414145062e-08,
+      "loss": 82.2384,
+      "step": 2639
+    },
+    {
+      "epoch": 1.8102665181249464,
+      "grad_norm": 0.7523531317710876,
+      "learning_rate": 1.0180268826476268e-08,
+      "loss": 82.2451,
+      "step": 2640
+    },
+    {
+      "epoch": 1.8109520952952267,
+      "grad_norm": 0.6392510533332825,
+      "learning_rate": 8.554276849426224e-09,
+      "loss": 82.2968,
+      "step": 2641
+    },
+    {
+      "epoch": 1.811637672465507,
+      "grad_norm": 0.6115372776985168,
+      "learning_rate": 7.069667782089173e-09,
+      "loss": 82.1473,
+      "step": 2642
+    },
+    {
+      "epoch": 1.8123232496357873,
+      "grad_norm": 0.6424670815467834,
+      "learning_rate": 5.726443723608199e-09,
+      "loss": 82.2287,
+      "step": 2643
+    },
+    {
+      "epoch": 1.8130088268060673,
+      "grad_norm": 0.6922582387924194,
+      "learning_rate": 4.5246065732307276e-09,
+      "loss": 82.1094,
+      "step": 2644
+    },
+    {
+      "epoch": 1.8136944039763476,
+      "grad_norm": 0.7161763906478882,
+      "learning_rate": 3.4641580303196307e-09,
+      "loss": 82.2028,
+      "step": 2645
+    },
+    {
+      "epoch": 1.8143799811466277,
+      "grad_norm": 0.6677362322807312,
+      "learning_rate": 2.545099594275513e-09,
+      "loss": 82.1961,
+      "step": 2646
+    },
+    {
+      "epoch": 1.815065558316908,
+      "grad_norm": 0.6319419145584106,
+      "learning_rate": 1.7674325646144241e-09,
+      "loss": 82.2142,
+      "step": 2647
+    },
+    {
+      "epoch": 1.8157511354871883,
+      "grad_norm": 0.7084919214248657,
+      "learning_rate": 1.1311580409012478e-09,
+      "loss": 82.1892,
+      "step": 2648
+    },
+    {
+      "epoch": 1.8164367126574685,
+      "grad_norm": 0.6398366689682007,
+      "learning_rate": 6.362769228163146e-10,
+      "loss": 82.2728,
+      "step": 2649
+    },
+    {
+      "epoch": 1.8171222898277488,
+      "grad_norm": 0.7294822335243225,
+      "learning_rate": 2.827899100776854e-10,
+      "loss": 82.3094,
+      "step": 2650
+    },
+    {
+      "epoch": 1.817807866998029,
+      "grad_norm": 0.7141733169555664,
+      "learning_rate": 7.069750250776608e-11,
+      "loss": 82.2557,
+      "step": 2651
+    },
+    {
+      "epoch": 1.8184934441683092,
+      "grad_norm": 0.6061791181564331,
+      "learning_rate": 0.0,
+      "loss": 82.2751,
+      "step": 2652
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1096200644198400.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null