Training in progress, step 155, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +389 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b0416a67038d04a975850f5728359c9aba648aa6350e20d7acbfd3fb2971d208
 size 50624

 version https://git-lfs.github.com/spec/v1
+oid sha256:26657c3db366efc7fa6d92372aa3219664b8571ca133e00feda4d1f205b15d90
 size 50624

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c99b267690272d6e4f628fa5599135199e250d82fd58007b6ef13455963166f8
 size 111142

 version https://git-lfs.github.com/spec/v1
+oid sha256:88fd526672df865f81c4176a06dc3da1929c1ce8317f76c85d58e8c3304b2eb9
 size 111142

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c661340682e18e4f90539ed362e83ed15d19b9a2a7967a40c1f4912323266dc0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:14a3afbab8271bee510684da18634a513067dc3f106f7cf4fa953ce19ce526db
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:598909828fb132bf741be4aea9ee4f44f8aebeb16d890d68d870b1973643d2d8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fad1344d0d68cec31f4379da8eb730afca29401bda64dafa2bff69b67bba283b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 10.353182792663574,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.22139200221392002,
   "eval_steps": 100,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -723,6 +723,391 @@
       "eval_samples_per_second": 328.204,
       "eval_steps_per_second": 82.159,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -746,12 +1131,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 41841957273600.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 10.353182792663574,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.34315760343157603,
   "eval_steps": 100,
+  "global_step": 155,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 328.204,
       "eval_steps_per_second": 82.159,
       "step": 100
+    },
+    {
+      "epoch": 0.2236059222360592,
+      "grad_norm": 0.03327897563576698,
+      "learning_rate": 6.0982293673944544e-05,
+      "loss": 10.3556,
+      "step": 101
+    },
+    {
+      "epoch": 0.22581984225819843,
+      "grad_norm": 0.03691767901182175,
+      "learning_rate": 5.899671794785839e-05,
+      "loss": 10.3553,
+      "step": 102
+    },
+    {
+      "epoch": 0.22803376228033762,
+      "grad_norm": 0.030172044411301613,
+      "learning_rate": 5.703038932486484e-05,
+      "loss": 10.3542,
+      "step": 103
+    },
+    {
+      "epoch": 0.23024768230247683,
+      "grad_norm": 0.021300997585058212,
+      "learning_rate": 5.5084230807412126e-05,
+      "loss": 10.3543,
+      "step": 104
+    },
+    {
+      "epoch": 0.23246160232461602,
+      "grad_norm": 0.03279775753617287,
+      "learning_rate": 5.3159155930021e-05,
+      "loss": 10.3539,
+      "step": 105
+    },
+    {
+      "epoch": 0.23467552234675523,
+      "grad_norm": 0.02924364060163498,
+      "learning_rate": 5.12560683304681e-05,
+      "loss": 10.3543,
+      "step": 106
+    },
+    {
+      "epoch": 0.23688944236889442,
+      "grad_norm": 0.03262259438633919,
+      "learning_rate": 4.9375861325614606e-05,
+      "loss": 10.355,
+      "step": 107
+    },
+    {
+      "epoch": 0.23910336239103364,
+      "grad_norm": 0.034073278307914734,
+      "learning_rate": 4.751941749207995e-05,
+      "loss": 10.357,
+      "step": 108
+    },
+    {
+      "epoch": 0.24131728241317282,
+      "grad_norm": 0.03489963710308075,
+      "learning_rate": 4.5687608251956714e-05,
+      "loss": 10.355,
+      "step": 109
+    },
+    {
+      "epoch": 0.243531202435312,
+      "grad_norm": 0.02581014297902584,
+      "learning_rate": 4.388129346376178e-05,
+      "loss": 10.3537,
+      "step": 110
+    },
+    {
+      "epoch": 0.24574512245745123,
+      "grad_norm": 0.023335812613368034,
+      "learning_rate": 4.210132101881516e-05,
+      "loss": 10.3553,
+      "step": 111
+    },
+    {
+      "epoch": 0.2479590424795904,
+      "grad_norm": 0.03612133115530014,
+      "learning_rate": 4.034852644323661e-05,
+      "loss": 10.3534,
+      "step": 112
+    },
+    {
+      "epoch": 0.2501729625017296,
+      "grad_norm": 0.02821163646876812,
+      "learning_rate": 3.862373250574626e-05,
+      "loss": 10.3556,
+      "step": 113
+    },
+    {
+      "epoch": 0.2523868825238688,
+      "grad_norm": 0.0300295390188694,
+      "learning_rate": 3.6927748831453836e-05,
+      "loss": 10.3551,
+      "step": 114
+    },
+    {
+      "epoch": 0.25460080254600803,
+      "grad_norm": 0.025033898651599884,
+      "learning_rate": 3.5261371521817244e-05,
+      "loss": 10.3525,
+      "step": 115
+    },
+    {
+      "epoch": 0.25681472256814725,
+      "grad_norm": 0.02417595498263836,
+      "learning_rate": 3.3625382780949574e-05,
+      "loss": 10.3542,
+      "step": 116
+    },
+    {
+      "epoch": 0.2590286425902864,
+      "grad_norm": 0.02323267050087452,
+      "learning_rate": 3.202055054844921e-05,
+      "loss": 10.3521,
+      "step": 117
+    },
+    {
+      "epoch": 0.2612425626124256,
+      "grad_norm": 0.026705941185355186,
+      "learning_rate": 3.0447628138926156e-05,
+      "loss": 10.3536,
+      "step": 118
+    },
+    {
+      "epoch": 0.26345648263456484,
+      "grad_norm": 0.033880215138196945,
+      "learning_rate": 2.890735388839295e-05,
+      "loss": 10.3554,
+      "step": 119
+    },
+    {
+      "epoch": 0.26567040265670405,
+      "grad_norm": 0.029678767547011375,
+      "learning_rate": 2.7400450807686938e-05,
+      "loss": 10.353,
+      "step": 120
+    },
+    {
+      "epoch": 0.2678843226788432,
+      "grad_norm": 0.02781762182712555,
+      "learning_rate": 2.59276262430861e-05,
+      "loss": 10.3553,
+      "step": 121
+    },
+    {
+      "epoch": 0.2700982427009824,
+      "grad_norm": 0.027942579239606857,
+      "learning_rate": 2.4489571544277945e-05,
+      "loss": 10.3556,
+      "step": 122
+    },
+    {
+      "epoch": 0.27231216272312164,
+      "grad_norm": 0.02813226915895939,
+      "learning_rate": 2.308696173983711e-05,
+      "loss": 10.3528,
+      "step": 123
+    },
+    {
+      "epoch": 0.2745260827452608,
+      "grad_norm": 0.030037103220820427,
+      "learning_rate": 2.1720455220364444e-05,
+      "loss": 10.353,
+      "step": 124
+    },
+    {
+      "epoch": 0.2767400027674,
+      "grad_norm": 0.030153660103678703,
+      "learning_rate": 2.0390693429435627e-05,
+      "loss": 10.3552,
+      "step": 125
+    },
+    {
+      "epoch": 0.27895392278953923,
+      "grad_norm": 0.037454936653375626,
+      "learning_rate": 1.9098300562505266e-05,
+      "loss": 10.3531,
+      "step": 126
+    },
+    {
+      "epoch": 0.28116784281167845,
+      "grad_norm": 0.029572051018476486,
+      "learning_rate": 1.784388327390687e-05,
+      "loss": 10.3504,
+      "step": 127
+    },
+    {
+      "epoch": 0.2833817628338176,
+      "grad_norm": 0.027182403951883316,
+      "learning_rate": 1.6628030392087e-05,
+      "loss": 10.3504,
+      "step": 128
+    },
+    {
+      "epoch": 0.2855956828559568,
+      "grad_norm": 0.03518354520201683,
+      "learning_rate": 1.5451312643206827e-05,
+      "loss": 10.3536,
+      "step": 129
+    },
+    {
+      "epoch": 0.28780960287809604,
+      "grad_norm": 0.024024929851293564,
+      "learning_rate": 1.4314282383241096e-05,
+      "loss": 10.3533,
+      "step": 130
+    },
+    {
+      "epoch": 0.29002352290023525,
+      "grad_norm": 0.020221339538693428,
+      "learning_rate": 1.3217473338699859e-05,
+      "loss": 10.3521,
+      "step": 131
+    },
+    {
+      "epoch": 0.2922374429223744,
+      "grad_norm": 0.02575266920030117,
+      "learning_rate": 1.2161400356095375e-05,
+      "loss": 10.3528,
+      "step": 132
+    },
+    {
+      "epoch": 0.29445136294451363,
+      "grad_norm": 0.019320376217365265,
+      "learning_rate": 1.1146559160270875e-05,
+      "loss": 10.3529,
+      "step": 133
+    },
+    {
+      "epoch": 0.29666528296665284,
+      "grad_norm": 0.022721335291862488,
+      "learning_rate": 1.0173426121705576e-05,
+      "loss": 10.3534,
+      "step": 134
+    },
+    {
+      "epoch": 0.298879202988792,
+      "grad_norm": 0.030306054279208183,
+      "learning_rate": 9.242458032904311e-06,
+      "loss": 10.3529,
+      "step": 135
+    },
+    {
+      "epoch": 0.3010931230109312,
+      "grad_norm": 0.03088550828397274,
+      "learning_rate": 8.354091893977401e-06,
+      "loss": 10.3542,
+      "step": 136
+    },
+    {
+      "epoch": 0.30330704303307043,
+      "grad_norm": 0.02875097282230854,
+      "learning_rate": 7.508744707511117e-06,
+      "loss": 10.3556,
+      "step": 137
+    },
+    {
+      "epoch": 0.30552096305520965,
+      "grad_norm": 0.022224275395274162,
+      "learning_rate": 6.70681328282492e-06,
+      "loss": 10.3536,
+      "step": 138
+    },
+    {
+      "epoch": 0.3077348830773488,
+      "grad_norm": 0.028994860127568245,
+      "learning_rate": 5.948674049707603e-06,
+      "loss": 10.3531,
+      "step": 139
+    },
+    {
+      "epoch": 0.309948803099488,
+      "grad_norm": 0.03301481530070305,
+      "learning_rate": 5.2346828817197655e-06,
+      "loss": 10.3539,
+      "step": 140
+    },
+    {
+      "epoch": 0.31216272312162724,
+      "grad_norm": 0.031127754598855972,
+      "learning_rate": 4.565174929145188e-06,
+      "loss": 10.3541,
+      "step": 141
+    },
+    {
+      "epoch": 0.31437664314376645,
+      "grad_norm": 0.03324393928050995,
+      "learning_rate": 3.940464461670135e-06,
+      "loss": 10.3558,
+      "step": 142
+    },
+    {
+      "epoch": 0.3165905631659056,
+      "grad_norm": 0.025115065276622772,
+      "learning_rate": 3.360844720863765e-06,
+      "loss": 10.3528,
+      "step": 143
+    },
+    {
+      "epoch": 0.31880448318804483,
+      "grad_norm": 0.02497878670692444,
+      "learning_rate": 2.826587782529444e-06,
+      "loss": 10.353,
+      "step": 144
+    },
+    {
+      "epoch": 0.32101840321018404,
+      "grad_norm": 0.026006096974015236,
+      "learning_rate": 2.3379444289913342e-06,
+      "loss": 10.3548,
+      "step": 145
+    },
+    {
+      "epoch": 0.32323232323232326,
+      "grad_norm": 0.028647800907492638,
+      "learning_rate": 1.8951440313760837e-06,
+      "loss": 10.355,
+      "step": 146
+    },
+    {
+      "epoch": 0.3254462432544624,
+      "grad_norm": 0.03259943798184395,
+      "learning_rate": 1.4983944419451613e-06,
+      "loss": 10.3541,
+      "step": 147
+    },
+    {
+      "epoch": 0.32766016327660163,
+      "grad_norm": 0.033153582364320755,
+      "learning_rate": 1.1478818965281911e-06,
+      "loss": 10.3529,
+      "step": 148
+    },
+    {
+      "epoch": 0.32987408329874085,
+      "grad_norm": 0.026517199352383614,
+      "learning_rate": 8.437709271030603e-07,
+      "loss": 10.3537,
+      "step": 149
+    },
+    {
+      "epoch": 0.33208800332088,
+      "grad_norm": 0.02658323012292385,
+      "learning_rate": 5.862042845640403e-07,
+      "loss": 10.3527,
+      "step": 150
+    },
+    {
+      "epoch": 0.3343019233430192,
+      "grad_norm": 0.024668825790286064,
+      "learning_rate": 3.7530287171387843e-07,
+      "loss": 10.3539,
+      "step": 151
+    },
+    {
+      "epoch": 0.33651584336515844,
+      "grad_norm": 0.030534988269209862,
+      "learning_rate": 2.1116568651156076e-07,
+      "loss": 10.3545,
+      "step": 152
+    },
+    {
+      "epoch": 0.33872976338729766,
+      "grad_norm": 0.024195190519094467,
+      "learning_rate": 9.386977560232879e-08,
+      "loss": 10.3539,
+      "step": 153
+    },
+    {
+      "epoch": 0.3409436834094368,
+      "grad_norm": 0.027664266526699066,
+      "learning_rate": 2.347019815158724e-08,
+      "loss": 10.3528,
+      "step": 154
+    },
+    {
+      "epoch": 0.34315760343157603,
+      "grad_norm": 0.03868336230516434,
+      "learning_rate": 0.0,
+      "loss": 10.3539,
+      "step": 155
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 64855033774080.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null