End of training

Browse files

Files changed (6) hide show

README.md +1 -0
all_results.json +9 -9
eval_results.json +5 -5
runs/May14_16-20-08_cs-Precision-7960-Tower/events.out.tfevents.1747254687.cs-Precision-7960-Tower.129778.1 +3 -0
train_results.json +4 -4
trainer_state.json +522 -522

README.md CHANGED Viewed

@@ -3,6 +3,7 @@ library_name: transformers
 license: apache-2.0
 base_model: facebook/wav2vec2-base
 tags:
 - generated_from_trainer
 datasets:
 - superb

 license: apache-2.0
 base_model: facebook/wav2vec2-base
 tags:
+- audio-classification
 - generated_from_trainer
 datasets:
 - superb

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
     "epoch": 7.996245306633291,
-    "eval_accuracy": 0.9763165636952045,
-    "eval_loss": 0.40624934434890747,
-    "eval_runtime": 5.5217,
-    "eval_samples_per_second": 1231.15,
-    "eval_steps_per_second": 38.575,
     "total_flos": 3.777723239743488e+18,
-    "train_loss": 1.152813568037359,
-    "train_runtime": 637.2674,
-    "train_samples_per_second": 641.414,
-    "train_steps_per_second": 2.498
 }

 {
     "epoch": 7.996245306633291,
+    "eval_accuracy": 0.9773462783171522,
+    "eval_loss": 0.11222562193870544,
+    "eval_runtime": 5.4886,
+    "eval_samples_per_second": 1238.577,
+    "eval_steps_per_second": 38.808,
     "total_flos": 3.777723239743488e+18,
+    "train_loss": 0.6284143277373745,
+    "train_runtime": 641.8193,
+    "train_samples_per_second": 636.865,
+    "train_steps_per_second": 2.48
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 7.996245306633291,
-    "eval_accuracy": 0.9763165636952045,
-    "eval_loss": 0.40624934434890747,
-    "eval_runtime": 5.5217,
-    "eval_samples_per_second": 1231.15,
-    "eval_steps_per_second": 38.575
 }

 {
     "epoch": 7.996245306633291,
+    "eval_accuracy": 0.9773462783171522,
+    "eval_loss": 0.11222562193870544,
+    "eval_runtime": 5.4886,
+    "eval_samples_per_second": 1238.577,
+    "eval_steps_per_second": 38.808
 }

runs/May14_16-20-08_cs-Precision-7960-Tower/events.out.tfevents.1747254687.cs-Precision-7960-Tower.129778.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f459728cd27001d03aa527ac123f8fc1920eb2da2cdfcdafddcfec8ca91308a
+size 411

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 7.996245306633291,
     "total_flos": 3.777723239743488e+18,
-    "train_loss": 1.152813568037359,
-    "train_runtime": 637.2674,
-    "train_samples_per_second": 641.414,
-    "train_steps_per_second": 2.498
 }

 {
     "epoch": 7.996245306633291,
     "total_flos": 3.777723239743488e+18,
+    "train_loss": 0.6284143277373745,
+    "train_runtime": 641.8193,
+    "train_samples_per_second": 636.865,
+    "train_steps_per_second": 2.48
 }

trainer_state.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "best_metric": 0.9763165636952045,
   "best_model_checkpoint": "wav2vec2-base-ft-keyword-spotting/checkpoint-1592",
   "epoch": 7.996245306633291,
   "eval_steps": 500,
@@ -10,1197 +10,1197 @@
   "log_history": [
     {
       "epoch": 0.05006257822277847,
-      "grad_norm": 2.2142410278320312,
-      "learning_rate": 6.25e-07,
-      "loss": 4.0997,
       "step": 10
     },
     {
       "epoch": 0.10012515644555695,
-      "grad_norm": 2.4960668087005615,
-      "learning_rate": 1.25e-06,
-      "loss": 4.1744,
       "step": 20
     },
     {
       "epoch": 0.15018773466833543,
-      "grad_norm": 2.9901962280273438,
-      "learning_rate": 1.8750000000000003e-06,
-      "loss": 4.1686,
       "step": 30
     },
     {
       "epoch": 0.2002503128911139,
-      "grad_norm": 2.9945929050445557,
-      "learning_rate": 2.5e-06,
-      "loss": 4.1114,
       "step": 40
     },
     {
       "epoch": 0.2503128911138924,
-      "grad_norm": 3.4015228748321533,
-      "learning_rate": 3.125e-06,
-      "loss": 4.0183,
       "step": 50
     },
     {
       "epoch": 0.30037546933667086,
-      "grad_norm": 4.165560722351074,
-      "learning_rate": 3.7500000000000005e-06,
-      "loss": 3.875,
       "step": 60
     },
     {
       "epoch": 0.3504380475594493,
-      "grad_norm": 4.998468399047852,
-      "learning_rate": 4.3750000000000005e-06,
-      "loss": 3.6691,
       "step": 70
     },
     {
       "epoch": 0.4005006257822278,
-      "grad_norm": 5.729038238525391,
-      "learning_rate": 5e-06,
-      "loss": 3.3538,
       "step": 80
     },
     {
       "epoch": 0.45056320400500627,
-      "grad_norm": 5.879266738891602,
-      "learning_rate": 5.625e-06,
-      "loss": 2.9866,
       "step": 90
     },
     {
       "epoch": 0.5006257822277848,
-      "grad_norm": 5.562048435211182,
-      "learning_rate": 6.25e-06,
-      "loss": 2.6957,
       "step": 100
     },
     {
       "epoch": 0.5506883604505632,
-      "grad_norm": 5.61751651763916,
-      "learning_rate": 6.875e-06,
-      "loss": 2.4759,
       "step": 110
     },
     {
       "epoch": 0.6007509386733417,
-      "grad_norm": 4.866910457611084,
-      "learning_rate": 7.500000000000001e-06,
-      "loss": 2.358,
       "step": 120
     },
     {
       "epoch": 0.6508135168961201,
-      "grad_norm": 4.472853183746338,
-      "learning_rate": 8.125000000000001e-06,
-      "loss": 2.2576,
       "step": 130
     },
     {
       "epoch": 0.7008760951188986,
-      "grad_norm": 4.833339691162109,
-      "learning_rate": 8.750000000000001e-06,
-      "loss": 2.1076,
       "step": 140
     },
     {
       "epoch": 0.7509386733416771,
-      "grad_norm": 4.425817966461182,
-      "learning_rate": 9.375000000000001e-06,
-      "loss": 2.0808,
       "step": 150
     },
     {
       "epoch": 0.8010012515644556,
-      "grad_norm": 3.747729539871216,
-      "learning_rate": 1e-05,
-      "loss": 2.0467,
       "step": 160
     },
     {
       "epoch": 0.851063829787234,
-      "grad_norm": 3.6987853050231934,
-      "learning_rate": 9.930167597765364e-06,
-      "loss": 1.9358,
       "step": 170
     },
     {
       "epoch": 0.9011264080100125,
-      "grad_norm": 3.0652871131896973,
-      "learning_rate": 9.860335195530727e-06,
-      "loss": 1.904,
       "step": 180
     },
     {
       "epoch": 0.951188986232791,
-      "grad_norm": 2.3470098972320557,
-      "learning_rate": 9.79050279329609e-06,
-      "loss": 1.8801,
       "step": 190
     },
     {
       "epoch": 0.9962453066332916,
-      "eval_accuracy": 0.6209179170344219,
-      "eval_loss": 1.7453958988189697,
-      "eval_runtime": 4.9497,
-      "eval_samples_per_second": 1373.405,
-      "eval_steps_per_second": 43.033,
       "step": 199
     },
     {
       "epoch": 1.0050062578222778,
-      "grad_norm": 2.7552437782287598,
-      "learning_rate": 9.720670391061454e-06,
-      "loss": 1.9461,
       "step": 200
     },
     {
       "epoch": 1.0550688360450564,
-      "grad_norm": 1.5700269937515259,
-      "learning_rate": 9.650837988826817e-06,
-      "loss": 1.7812,
       "step": 210
     },
     {
       "epoch": 1.1051314142678348,
-      "grad_norm": 1.4513113498687744,
-      "learning_rate": 9.581005586592178e-06,
-      "loss": 1.7865,
       "step": 220
     },
     {
       "epoch": 1.1551939924906134,
-      "grad_norm": 1.5353403091430664,
-      "learning_rate": 9.511173184357543e-06,
-      "loss": 1.7024,
       "step": 230
     },
     {
       "epoch": 1.2052565707133918,
-      "grad_norm": 1.4006606340408325,
-      "learning_rate": 9.441340782122905e-06,
-      "loss": 1.7307,
       "step": 240
     },
     {
       "epoch": 1.2553191489361701,
-      "grad_norm": 2.397796630859375,
-      "learning_rate": 9.371508379888268e-06,
-      "loss": 1.7121,
       "step": 250
     },
     {
       "epoch": 1.3053817271589487,
-      "grad_norm": 2.891803503036499,
-      "learning_rate": 9.301675977653633e-06,
-      "loss": 1.7195,
       "step": 260
     },
     {
       "epoch": 1.355444305381727,
-      "grad_norm": 13.237679481506348,
-      "learning_rate": 9.231843575418995e-06,
-      "loss": 1.6628,
       "step": 270
     },
     {
       "epoch": 1.4055068836045057,
-      "grad_norm": 4.088449478149414,
-      "learning_rate": 9.162011173184358e-06,
-      "loss": 1.6614,
       "step": 280
     },
     {
       "epoch": 1.455569461827284,
-      "grad_norm": 4.54097318649292,
-      "learning_rate": 9.092178770949721e-06,
-      "loss": 1.5913,
       "step": 290
     },
     {
       "epoch": 1.5056320400500627,
-      "grad_norm": 2.6582536697387695,
-      "learning_rate": 9.022346368715084e-06,
-      "loss": 1.5883,
       "step": 300
     },
     {
       "epoch": 1.555694618272841,
-      "grad_norm": 5.469503402709961,
-      "learning_rate": 8.952513966480448e-06,
-      "loss": 1.6299,
       "step": 310
     },
     {
       "epoch": 1.6057571964956194,
-      "grad_norm": 2.4134175777435303,
-      "learning_rate": 8.88268156424581e-06,
-      "loss": 1.5386,
       "step": 320
     },
     {
       "epoch": 1.655819774718398,
-      "grad_norm": 5.472087860107422,
-      "learning_rate": 8.812849162011174e-06,
-      "loss": 1.5774,
       "step": 330
     },
     {
       "epoch": 1.7058823529411766,
-      "grad_norm": 3.7888548374176025,
-      "learning_rate": 8.743016759776537e-06,
-      "loss": 1.5314,
       "step": 340
     },
     {
       "epoch": 1.7559449311639548,
-      "grad_norm": 6.527620315551758,
-      "learning_rate": 8.6731843575419e-06,
-      "loss": 1.4678,
       "step": 350
     },
     {
       "epoch": 1.8060075093867334,
-      "grad_norm": 6.796471118927002,
-      "learning_rate": 8.603351955307264e-06,
-      "loss": 1.4769,
       "step": 360
     },
     {
       "epoch": 1.856070087609512,
-      "grad_norm": 4.604396343231201,
-      "learning_rate": 8.533519553072627e-06,
-      "loss": 1.4285,
       "step": 370
     },
     {
       "epoch": 1.9061326658322904,
-      "grad_norm": 5.599514484405518,
-      "learning_rate": 8.463687150837988e-06,
-      "loss": 1.3812,
       "step": 380
     },
     {
       "epoch": 1.9561952440550687,
-      "grad_norm": 13.588865280151367,
-      "learning_rate": 8.393854748603353e-06,
-      "loss": 1.389,
       "step": 390
     },
     {
       "epoch": 1.9962453066332917,
-      "eval_accuracy": 0.6518093556928508,
-      "eval_loss": 1.240417718887329,
-      "eval_runtime": 4.8608,
-      "eval_samples_per_second": 1398.548,
-      "eval_steps_per_second": 43.82,
       "step": 398
     },
     {
       "epoch": 2.0100125156445556,
-      "grad_norm": 5.914628028869629,
-      "learning_rate": 8.324022346368715e-06,
-      "loss": 1.4118,
       "step": 400
     },
     {
       "epoch": 2.0600750938673342,
-      "grad_norm": 9.35991096496582,
-      "learning_rate": 8.254189944134078e-06,
-      "loss": 1.3324,
       "step": 410
     },
     {
       "epoch": 2.110137672090113,
-      "grad_norm": 7.8319220542907715,
-      "learning_rate": 8.184357541899443e-06,
-      "loss": 1.3393,
       "step": 420
     },
     {
       "epoch": 2.160200250312891,
-      "grad_norm": 3.685518264770508,
-      "learning_rate": 8.114525139664805e-06,
-      "loss": 1.3176,
       "step": 430
     },
     {
       "epoch": 2.2102628285356696,
-      "grad_norm": 4.464322566986084,
-      "learning_rate": 8.044692737430168e-06,
-      "loss": 1.2491,
       "step": 440
     },
     {
       "epoch": 2.260325406758448,
-      "grad_norm": 6.714467525482178,
-      "learning_rate": 7.974860335195531e-06,
-      "loss": 1.2923,
       "step": 450
     },
     {
       "epoch": 2.3103879849812268,
-      "grad_norm": 3.9189820289611816,
-      "learning_rate": 7.905027932960894e-06,
-      "loss": 1.2584,
       "step": 460
     },
     {
       "epoch": 2.360450563204005,
-      "grad_norm": 10.34443473815918,
-      "learning_rate": 7.835195530726258e-06,
-      "loss": 1.2372,
       "step": 470
     },
     {
       "epoch": 2.4105131414267835,
-      "grad_norm": 3.698418140411377,
-      "learning_rate": 7.76536312849162e-06,
-      "loss": 1.225,
       "step": 480
     },
     {
       "epoch": 2.460575719649562,
-      "grad_norm": 8.271072387695312,
-      "learning_rate": 7.695530726256984e-06,
-      "loss": 1.1353,
       "step": 490
     },
     {
       "epoch": 2.5106382978723403,
-      "grad_norm": 3.2551236152648926,
-      "learning_rate": 7.625698324022347e-06,
-      "loss": 1.1894,
       "step": 500
     },
     {
       "epoch": 2.560700876095119,
-      "grad_norm": 7.808169364929199,
-      "learning_rate": 7.5558659217877105e-06,
-      "loss": 1.2102,
       "step": 510
     },
     {
       "epoch": 2.6107634543178975,
-      "grad_norm": 10.614561080932617,
-      "learning_rate": 7.486033519553073e-06,
-      "loss": 1.1493,
       "step": 520
     },
     {
       "epoch": 2.660826032540676,
-      "grad_norm": 4.759502410888672,
-      "learning_rate": 7.416201117318437e-06,
-      "loss": 1.1384,
       "step": 530
     },
     {
       "epoch": 2.710888610763454,
-      "grad_norm": 8.49401569366455,
-      "learning_rate": 7.346368715083799e-06,
-      "loss": 1.0993,
       "step": 540
     },
     {
       "epoch": 2.760951188986233,
-      "grad_norm": 4.307897567749023,
-      "learning_rate": 7.2765363128491625e-06,
-      "loss": 1.1151,
       "step": 550
     },
     {
       "epoch": 2.8110137672090114,
-      "grad_norm": 4.177582740783691,
-      "learning_rate": 7.206703910614526e-06,
-      "loss": 1.1375,
       "step": 560
     },
     {
       "epoch": 2.8610763454317896,
-      "grad_norm": 3.8811707496643066,
-      "learning_rate": 7.136871508379889e-06,
-      "loss": 1.1109,
       "step": 570
     },
     {
       "epoch": 2.911138923654568,
-      "grad_norm": 2.818143129348755,
-      "learning_rate": 7.067039106145251e-06,
-      "loss": 1.1036,
       "step": 580
     },
     {
       "epoch": 2.9612015018773468,
-      "grad_norm": 3.4761757850646973,
-      "learning_rate": 6.9972067039106154e-06,
-      "loss": 1.1239,
       "step": 590
     },
     {
       "epoch": 2.9962453066332917,
-      "eval_accuracy": 0.7880258899676376,
-      "eval_loss": 1.069029688835144,
-      "eval_runtime": 4.802,
-      "eval_samples_per_second": 1415.653,
-      "eval_steps_per_second": 44.356,
       "step": 597
     },
     {
       "epoch": 3.0150187734668337,
-      "grad_norm": 4.441803455352783,
-      "learning_rate": 6.927374301675979e-06,
-      "loss": 1.1924,
       "step": 600
     },
     {
       "epoch": 3.065081351689612,
-      "grad_norm": 6.063540458679199,
-      "learning_rate": 6.857541899441341e-06,
-      "loss": 1.0628,
       "step": 610
     },
     {
       "epoch": 3.1151439299123904,
-      "grad_norm": 4.163120746612549,
-      "learning_rate": 6.787709497206705e-06,
-      "loss": 1.0444,
       "step": 620
     },
     {
       "epoch": 3.165206508135169,
-      "grad_norm": 6.099351406097412,
-      "learning_rate": 6.7178770949720675e-06,
-      "loss": 1.1028,
       "step": 630
     },
     {
       "epoch": 3.2152690863579476,
-      "grad_norm": 4.259471416473389,
-      "learning_rate": 6.648044692737431e-06,
-      "loss": 1.0317,
       "step": 640
     },
     {
       "epoch": 3.2653316645807258,
-      "grad_norm": 4.617487907409668,
-      "learning_rate": 6.578212290502793e-06,
-      "loss": 1.0323,
       "step": 650
     },
     {
       "epoch": 3.3153942428035044,
-      "grad_norm": 4.48543119430542,
-      "learning_rate": 6.508379888268157e-06,
-      "loss": 1.0185,
       "step": 660
     },
     {
       "epoch": 3.365456821026283,
-      "grad_norm": 4.388581275939941,
-      "learning_rate": 6.43854748603352e-06,
-      "loss": 0.9979,
       "step": 670
     },
     {
       "epoch": 3.415519399249061,
-      "grad_norm": 7.762565612792969,
-      "learning_rate": 6.368715083798883e-06,
-      "loss": 0.9999,
       "step": 680
     },
     {
       "epoch": 3.4655819774718397,
-      "grad_norm": 3.058173179626465,
-      "learning_rate": 6.298882681564247e-06,
-      "loss": 0.9729,
       "step": 690
     },
     {
       "epoch": 3.5156445556946183,
-      "grad_norm": 6.832275390625,
-      "learning_rate": 6.229050279329609e-06,
-      "loss": 1.0058,
       "step": 700
     },
     {
       "epoch": 3.565707133917397,
-      "grad_norm": 3.4098267555236816,
-      "learning_rate": 6.1592178770949725e-06,
-      "loss": 0.9634,
       "step": 710
     },
     {
       "epoch": 3.615769712140175,
-      "grad_norm": 5.360341548919678,
-      "learning_rate": 6.089385474860336e-06,
-      "loss": 0.9622,
       "step": 720
     },
     {
       "epoch": 3.6658322903629537,
-      "grad_norm": 3.9530041217803955,
-      "learning_rate": 6.019553072625699e-06,
-      "loss": 0.9585,
       "step": 730
     },
     {
       "epoch": 3.7158948685857323,
-      "grad_norm": 8.933693885803223,
-      "learning_rate": 5.949720670391061e-06,
-      "loss": 0.953,
       "step": 740
     },
     {
       "epoch": 3.7659574468085104,
-      "grad_norm": 3.6110997200012207,
-      "learning_rate": 5.879888268156425e-06,
-      "loss": 0.9176,
       "step": 750
     },
     {
       "epoch": 3.816020025031289,
-      "grad_norm": 3.877078056335449,
-      "learning_rate": 5.810055865921789e-06,
-      "loss": 0.9133,
       "step": 760
     },
     {
       "epoch": 3.8660826032540676,
-      "grad_norm": 6.531003952026367,
-      "learning_rate": 5.740223463687151e-06,
-      "loss": 0.9288,
       "step": 770
     },
     {
       "epoch": 3.916145181476846,
-      "grad_norm": 3.672001600265503,
-      "learning_rate": 5.670391061452515e-06,
-      "loss": 0.8899,
       "step": 780
     },
     {
       "epoch": 3.966207759699625,
-      "grad_norm": 3.4693164825439453,
-      "learning_rate": 5.6005586592178775e-06,
-      "loss": 0.9107,
       "step": 790
     },
     {
       "epoch": 3.9962453066332917,
-      "eval_accuracy": 0.8961459252721389,
-      "eval_loss": 0.7700026035308838,
-      "eval_runtime": 4.7597,
-      "eval_samples_per_second": 1428.236,
-      "eval_steps_per_second": 44.751,
       "step": 796
     },
     {
       "epoch": 4.020025031289111,
-      "grad_norm": 5.503833293914795,
-      "learning_rate": 5.530726256983241e-06,
-      "loss": 0.9085,
       "step": 800
     },
     {
       "epoch": 4.07008760951189,
-      "grad_norm": 5.365431785583496,
-      "learning_rate": 5.460893854748603e-06,
-      "loss": 0.908,
       "step": 810
     },
     {
       "epoch": 4.1201501877346685,
-      "grad_norm": 4.582799434661865,
-      "learning_rate": 5.391061452513967e-06,
-      "loss": 0.8559,
       "step": 820
     },
     {
       "epoch": 4.170212765957447,
-      "grad_norm": 3.201195478439331,
-      "learning_rate": 5.3212290502793296e-06,
-      "loss": 0.8901,
       "step": 830
     },
     {
       "epoch": 4.220275344180226,
-      "grad_norm": 6.477810382843018,
-      "learning_rate": 5.251396648044693e-06,
-      "loss": 0.88,
       "step": 840
     },
     {
       "epoch": 4.270337922403003,
-      "grad_norm": 3.525606870651245,
-      "learning_rate": 5.181564245810057e-06,
-      "loss": 0.8099,
       "step": 850
     },
     {
       "epoch": 4.320400500625782,
-      "grad_norm": 3.92795729637146,
-      "learning_rate": 5.111731843575419e-06,
-      "loss": 0.8326,
       "step": 860
     },
     {
       "epoch": 4.370463078848561,
-      "grad_norm": 4.019381523132324,
-      "learning_rate": 5.041899441340783e-06,
-      "loss": 0.7902,
       "step": 870
     },
     {
       "epoch": 4.420525657071339,
-      "grad_norm": 3.7563273906707764,
-      "learning_rate": 4.972067039106146e-06,
-      "loss": 0.8322,
       "step": 880
     },
     {
       "epoch": 4.470588235294118,
-      "grad_norm": 4.296588897705078,
-      "learning_rate": 4.902234636871509e-06,
-      "loss": 0.7835,
       "step": 890
     },
     {
       "epoch": 4.520650813516896,
-      "grad_norm": 4.3726277351379395,
-      "learning_rate": 4.832402234636872e-06,
-      "loss": 0.7505,
       "step": 900
     },
     {
       "epoch": 4.570713391739675,
-      "grad_norm": 3.2456297874450684,
-      "learning_rate": 4.762569832402235e-06,
-      "loss": 0.7588,
       "step": 910
     },
     {
       "epoch": 4.6207759699624535,
-      "grad_norm": 4.264461994171143,
-      "learning_rate": 4.692737430167599e-06,
-      "loss": 0.7566,
       "step": 920
     },
     {
       "epoch": 4.670838548185231,
-      "grad_norm": 3.387613296508789,
-      "learning_rate": 4.622905027932961e-06,
-      "loss": 0.7585,
       "step": 930
     },
     {
       "epoch": 4.72090112640801,
-      "grad_norm": 4.655552864074707,
-      "learning_rate": 4.553072625698324e-06,
-      "loss": 0.7171,
       "step": 940
     },
     {
       "epoch": 4.7709637046307884,
-      "grad_norm": 4.884917259216309,
-      "learning_rate": 4.4832402234636875e-06,
-      "loss": 0.789,
       "step": 950
     },
     {
       "epoch": 4.821026282853567,
-      "grad_norm": 5.926691055297852,
-      "learning_rate": 4.413407821229051e-06,
-      "loss": 0.7626,
       "step": 960
     },
     {
       "epoch": 4.871088861076346,
-      "grad_norm": 5.080448150634766,
-      "learning_rate": 4.343575418994414e-06,
-      "loss": 0.7362,
       "step": 970
     },
     {
       "epoch": 4.921151439299124,
-      "grad_norm": 3.3272018432617188,
-      "learning_rate": 4.273743016759777e-06,
-      "loss": 0.753,
       "step": 980
     },
     {
       "epoch": 4.971214017521902,
-      "grad_norm": 4.017044544219971,
-      "learning_rate": 4.20391061452514e-06,
-      "loss": 0.7231,
       "step": 990
     },
     {
       "epoch": 4.996245306633291,
-      "eval_accuracy": 0.9658723153868785,
-      "eval_loss": 0.6167460680007935,
-      "eval_runtime": 4.7239,
-      "eval_samples_per_second": 1439.062,
-      "eval_steps_per_second": 45.09,
       "step": 995
     },
     {
       "epoch": 5.025031289111389,
-      "grad_norm": 3.775956392288208,
-      "learning_rate": 4.134078212290504e-06,
-      "loss": 0.7468,
       "step": 1000
     },
     {
       "epoch": 5.075093867334168,
-      "grad_norm": 3.734192371368408,
-      "learning_rate": 4.064245810055866e-06,
-      "loss": 0.7145,
       "step": 1010
     },
     {
       "epoch": 5.1251564455569465,
-      "grad_norm": 5.420441150665283,
-      "learning_rate": 3.994413407821229e-06,
-      "loss": 0.6958,
       "step": 1020
     },
     {
       "epoch": 5.175219023779725,
-      "grad_norm": 4.203883171081543,
-      "learning_rate": 3.9245810055865924e-06,
-      "loss": 0.714,
       "step": 1030
     },
     {
       "epoch": 5.225281602002503,
-      "grad_norm": 4.452067852020264,
-      "learning_rate": 3.854748603351956e-06,
-      "loss": 0.6791,
       "step": 1040
     },
     {
       "epoch": 5.275344180225281,
-      "grad_norm": 3.5976009368896484,
-      "learning_rate": 3.7849162011173185e-06,
-      "loss": 0.656,
       "step": 1050
     },
     {
       "epoch": 5.32540675844806,
-      "grad_norm": 3.9160947799682617,
-      "learning_rate": 3.715083798882682e-06,
-      "loss": 0.6789,
       "step": 1060
     },
     {
       "epoch": 5.375469336670839,
-      "grad_norm": 3.4226489067077637,
-      "learning_rate": 3.6452513966480454e-06,
-      "loss": 0.6889,
       "step": 1070
     },
     {
       "epoch": 5.425531914893617,
-      "grad_norm": 5.1838788986206055,
-      "learning_rate": 3.575418994413408e-06,
-      "loss": 0.6529,
       "step": 1080
     },
     {
       "epoch": 5.475594493116396,
-      "grad_norm": 3.7870497703552246,
-      "learning_rate": 3.5055865921787714e-06,
-      "loss": 0.6201,
       "step": 1090
     },
     {
       "epoch": 5.5256570713391735,
-      "grad_norm": 5.270695686340332,
-      "learning_rate": 3.435754189944134e-06,
-      "loss": 0.6675,
       "step": 1100
     },
     {
       "epoch": 5.575719649561952,
-      "grad_norm": 3.8207719326019287,
-      "learning_rate": 3.3659217877094974e-06,
-      "loss": 0.6715,
       "step": 1110
     },
     {
       "epoch": 5.625782227784731,
-      "grad_norm": 3.1934285163879395,
-      "learning_rate": 3.2960893854748607e-06,
-      "loss": 0.6449,
       "step": 1120
     },
     {
       "epoch": 5.675844806007509,
-      "grad_norm": 4.760712623596191,
-      "learning_rate": 3.2262569832402235e-06,
-      "loss": 0.6331,
       "step": 1130
     },
     {
       "epoch": 5.725907384230288,
-      "grad_norm": 3.9951508045196533,
-      "learning_rate": 3.1564245810055867e-06,
-      "loss": 0.6289,
       "step": 1140
     },
     {
       "epoch": 5.7759699624530665,
-      "grad_norm": 5.959083557128906,
-      "learning_rate": 3.0865921787709503e-06,
-      "loss": 0.6101,
       "step": 1150
     },
     {
       "epoch": 5.826032540675845,
-      "grad_norm": 5.146860599517822,
-      "learning_rate": 3.016759776536313e-06,
-      "loss": 0.6235,
       "step": 1160
     },
     {
       "epoch": 5.876095118898624,
-      "grad_norm": 4.0103230476379395,
-      "learning_rate": 2.9469273743016764e-06,
-      "loss": 0.6472,
       "step": 1170
     },
     {
       "epoch": 5.926157697121401,
-      "grad_norm": 4.349458694458008,
-      "learning_rate": 2.877094972067039e-06,
-      "loss": 0.5924,
       "step": 1180
     },
     {
       "epoch": 5.97622027534418,
-      "grad_norm": 4.416366100311279,
-      "learning_rate": 2.8072625698324024e-06,
-      "loss": 0.5972,
       "step": 1190
     },
     {
       "epoch": 5.996245306633291,
-      "eval_accuracy": 0.9735216240070609,
-      "eval_loss": 0.48377424478530884,
-      "eval_runtime": 4.9277,
-      "eval_samples_per_second": 1379.544,
-      "eval_steps_per_second": 43.225,
       "step": 1194
     },
     {
       "epoch": 6.030037546933667,
-      "grad_norm": 5.7863569259643555,
-      "learning_rate": 2.7374301675977656e-06,
-      "loss": 0.6391,
       "step": 1200
     },
     {
       "epoch": 6.080100125156446,
-      "grad_norm": 3.5653481483459473,
-      "learning_rate": 2.6675977653631285e-06,
-      "loss": 0.577,
       "step": 1210
     },
     {
       "epoch": 6.130162703379224,
-      "grad_norm": 2.4277288913726807,
-      "learning_rate": 2.5977653631284917e-06,
-      "loss": 0.5942,
       "step": 1220
     },
     {
       "epoch": 6.180225281602002,
-      "grad_norm": 4.4123406410217285,
-      "learning_rate": 2.5279329608938553e-06,
-      "loss": 0.5952,
       "step": 1230
     },
     {
       "epoch": 6.230287859824781,
-      "grad_norm": 3.4356672763824463,
-      "learning_rate": 2.458100558659218e-06,
-      "loss": 0.5597,
       "step": 1240
     },
     {
       "epoch": 6.280350438047559,
-      "grad_norm": 3.3665220737457275,
-      "learning_rate": 2.3882681564245814e-06,
-      "loss": 0.586,
       "step": 1250
     },
     {
       "epoch": 6.330413016270338,
-      "grad_norm": 4.899147987365723,
-      "learning_rate": 2.318435754189944e-06,
-      "loss": 0.5691,
       "step": 1260
     },
     {
       "epoch": 6.380475594493117,
-      "grad_norm": 3.53438663482666,
-      "learning_rate": 2.2486033519553074e-06,
-      "loss": 0.5598,
       "step": 1270
     },
     {
       "epoch": 6.430538172715895,
-      "grad_norm": 2.7354676723480225,
-      "learning_rate": 2.1787709497206706e-06,
-      "loss": 0.5858,
       "step": 1280
     },
     {
       "epoch": 6.480600750938673,
-      "grad_norm": 4.669964790344238,
-      "learning_rate": 2.108938547486034e-06,
-      "loss": 0.5727,
       "step": 1290
     },
     {
       "epoch": 6.5306633291614515,
-      "grad_norm": 4.332780361175537,
-      "learning_rate": 2.039106145251397e-06,
-      "loss": 0.5416,
       "step": 1300
     },
     {
       "epoch": 6.58072590738423,
-      "grad_norm": 5.418635845184326,
-      "learning_rate": 1.96927374301676e-06,
-      "loss": 0.5533,
       "step": 1310
     },
     {
       "epoch": 6.630788485607009,
-      "grad_norm": 2.963872194290161,
-      "learning_rate": 1.899441340782123e-06,
-      "loss": 0.5441,
       "step": 1320
     },
     {
       "epoch": 6.680851063829787,
-      "grad_norm": 3.982882261276245,
-      "learning_rate": 1.8296089385474861e-06,
-      "loss": 0.5522,
       "step": 1330
     },
     {
       "epoch": 6.730913642052566,
-      "grad_norm": 3.4341752529144287,
-      "learning_rate": 1.7597765363128494e-06,
-      "loss": 0.5156,
       "step": 1340
     },
     {
       "epoch": 6.7809762202753445,
-      "grad_norm": 3.866319179534912,
-      "learning_rate": 1.6899441340782124e-06,
-      "loss": 0.543,
       "step": 1350
     },
     {
       "epoch": 6.831038798498122,
-      "grad_norm": 4.521007537841797,
-      "learning_rate": 1.6201117318435754e-06,
-      "loss": 0.5466,
       "step": 1360
     },
     {
       "epoch": 6.881101376720901,
-      "grad_norm": 4.014106273651123,
-      "learning_rate": 1.5502793296089386e-06,
-      "loss": 0.5131,
       "step": 1370
     },
     {
       "epoch": 6.931163954943679,
-      "grad_norm": 4.237702369689941,
-      "learning_rate": 1.4804469273743019e-06,
-      "loss": 0.5626,
       "step": 1380
     },
     {
       "epoch": 6.981226533166458,
-      "grad_norm": 3.3088533878326416,
-      "learning_rate": 1.4106145251396649e-06,
-      "loss": 0.5143,
       "step": 1390
     },
     {
       "epoch": 6.996245306633291,
-      "eval_accuracy": 0.9761694616063548,
-      "eval_loss": 0.4227001368999481,
-      "eval_runtime": 4.8947,
-      "eval_samples_per_second": 1388.847,
-      "eval_steps_per_second": 43.516,
       "step": 1393
     },
     {
       "epoch": 7.035043804755945,
-      "grad_norm": 3.7986509799957275,
-      "learning_rate": 1.3407821229050281e-06,
-      "loss": 0.574,
       "step": 1400
     },
     {
       "epoch": 7.085106382978723,
-      "grad_norm": 4.211392879486084,
-      "learning_rate": 1.2709497206703911e-06,
-      "loss": 0.5194,
       "step": 1410
     },
     {
       "epoch": 7.135168961201502,
-      "grad_norm": 3.6076323986053467,
-      "learning_rate": 1.2011173184357544e-06,
-      "loss": 0.5085,
       "step": 1420
     },
     {
       "epoch": 7.18523153942428,
-      "grad_norm": 5.054622650146484,
-      "learning_rate": 1.1312849162011174e-06,
-      "loss": 0.5059,
       "step": 1430
     },
     {
       "epoch": 7.235294117647059,
-      "grad_norm": 5.9564313888549805,
-      "learning_rate": 1.0614525139664806e-06,
-      "loss": 0.509,
       "step": 1440
     },
     {
       "epoch": 7.2853566958698375,
-      "grad_norm": 5.2771220207214355,
-      "learning_rate": 9.916201117318436e-07,
-      "loss": 0.5613,
       "step": 1450
     },
     {
       "epoch": 7.335419274092616,
-      "grad_norm": 3.4556643962860107,
-      "learning_rate": 9.217877094972068e-07,
-      "loss": 0.5164,
       "step": 1460
     },
     {
       "epoch": 7.385481852315394,
-      "grad_norm": 4.0196003913879395,
-      "learning_rate": 8.519553072625699e-07,
-      "loss": 0.5255,
       "step": 1470
     },
     {
       "epoch": 7.435544430538172,
-      "grad_norm": 2.3283958435058594,
-      "learning_rate": 7.82122905027933e-07,
-      "loss": 0.5133,
       "step": 1480
     },
     {
       "epoch": 7.485607008760951,
-      "grad_norm": 3.3878517150878906,
-      "learning_rate": 7.122905027932961e-07,
-      "loss": 0.5028,
       "step": 1490
     },
     {
       "epoch": 7.53566958698373,
-      "grad_norm": 4.05161190032959,
-      "learning_rate": 6.424581005586592e-07,
-      "loss": 0.5191,
       "step": 1500
     },
     {
       "epoch": 7.585732165206508,
-      "grad_norm": 3.4434776306152344,
-      "learning_rate": 5.726256983240224e-07,
-      "loss": 0.5347,
       "step": 1510
     },
     {
       "epoch": 7.635794743429287,
-      "grad_norm": 3.151704788208008,
-      "learning_rate": 5.027932960893855e-07,
-      "loss": 0.5233,
       "step": 1520
     },
     {
       "epoch": 7.685857321652065,
-      "grad_norm": 4.1916046142578125,
-      "learning_rate": 4.3296089385474867e-07,
-      "loss": 0.5469,
       "step": 1530
     },
     {
       "epoch": 7.735919899874844,
-      "grad_norm": 5.041410446166992,
-      "learning_rate": 3.631284916201118e-07,
-      "loss": 0.5272,
       "step": 1540
     },
     {
       "epoch": 7.785982478097622,
-      "grad_norm": 4.148556709289551,
-      "learning_rate": 2.932960893854749e-07,
-      "loss": 0.5222,
       "step": 1550
     },
     {
       "epoch": 7.8360450563204,
-      "grad_norm": 2.708613872528076,
-      "learning_rate": 2.23463687150838e-07,
-      "loss": 0.5405,
       "step": 1560
     },
     {
       "epoch": 7.886107634543179,
-      "grad_norm": 3.7867846488952637,
-      "learning_rate": 1.5363128491620113e-07,
-      "loss": 0.5249,
       "step": 1570
     },
     {
       "epoch": 7.9361702127659575,
-      "grad_norm": 3.1069061756134033,
-      "learning_rate": 8.379888268156426e-08,
-      "loss": 0.52,
       "step": 1580
     },
     {
       "epoch": 7.986232790988736,
-      "grad_norm": 4.343489646911621,
-      "learning_rate": 1.3966480446927376e-08,
-      "loss": 0.5159,
       "step": 1590
     },
     {
       "epoch": 7.996245306633291,
-      "eval_accuracy": 0.9763165636952045,
-      "eval_loss": 0.40624934434890747,
-      "eval_runtime": 5.5236,
-      "eval_samples_per_second": 1230.722,
-      "eval_steps_per_second": 38.562,
       "step": 1592
     },
     {
       "epoch": 7.996245306633291,
       "step": 1592,
       "total_flos": 3.777723239743488e+18,
-      "train_loss": 1.152813568037359,
-      "train_runtime": 637.2674,
-      "train_samples_per_second": 641.414,
-      "train_steps_per_second": 2.498
     }
   ],
   "logging_steps": 10,

 {
+  "best_metric": 0.9773462783171522,
   "best_model_checkpoint": "wav2vec2-base-ft-keyword-spotting/checkpoint-1592",
   "epoch": 7.996245306633291,
   "eval_steps": 500,
   "log_history": [
     {
       "epoch": 0.05006257822277847,
+      "grad_norm": 2.5059146881103516,
+      "learning_rate": 3.125e-06,
+      "loss": 4.1393,
       "step": 10
     },
     {
       "epoch": 0.10012515644555695,
+      "grad_norm": 3.3145341873168945,
+      "learning_rate": 6.25e-06,
+      "loss": 4.1209,
       "step": 20
     },
     {
       "epoch": 0.15018773466833543,
+      "grad_norm": 3.9769318103790283,
+      "learning_rate": 9.375000000000001e-06,
+      "loss": 3.8903,
       "step": 30
     },
     {
       "epoch": 0.2002503128911139,
+      "grad_norm": 5.446993350982666,
+      "learning_rate": 1.25e-05,
+      "loss": 3.4194,
       "step": 40
     },
     {
       "epoch": 0.2503128911138924,
+      "grad_norm": 5.676177978515625,
+      "learning_rate": 1.5625e-05,
+      "loss": 2.7891,
       "step": 50
     },
     {
       "epoch": 0.30037546933667086,
+      "grad_norm": 5.550710678100586,
+      "learning_rate": 1.8750000000000002e-05,
+      "loss": 2.3951,
       "step": 60
     },
     {
       "epoch": 0.3504380475594493,
+      "grad_norm": 4.72838020324707,
+      "learning_rate": 2.1875e-05,
+      "loss": 2.1523,
       "step": 70
     },
     {
       "epoch": 0.4005006257822278,
+      "grad_norm": 3.6451964378356934,
+      "learning_rate": 2.5e-05,
+      "loss": 1.9811,
       "step": 80
     },
     {
       "epoch": 0.45056320400500627,
+      "grad_norm": 2.2974889278411865,
+      "learning_rate": 2.8125000000000003e-05,
+      "loss": 1.8507,
       "step": 90
     },
     {
       "epoch": 0.5006257822277848,
+      "grad_norm": 0.8505929708480835,
+      "learning_rate": 3.125e-05,
+      "loss": 1.7874,
       "step": 100
     },
     {
       "epoch": 0.5506883604505632,
+      "grad_norm": 0.5304011702537537,
+      "learning_rate": 3.4375e-05,
+      "loss": 1.7174,
       "step": 110
     },
     {
       "epoch": 0.6007509386733417,
+      "grad_norm": 1.0286344289779663,
+      "learning_rate": 3.7500000000000003e-05,
+      "loss": 1.7567,
       "step": 120
     },
     {
       "epoch": 0.6508135168961201,
+      "grad_norm": 0.9384429454803467,
+      "learning_rate": 4.0625000000000005e-05,
+      "loss": 1.7851,
       "step": 130
     },
     {
       "epoch": 0.7008760951188986,
+      "grad_norm": 2.4175920486450195,
+      "learning_rate": 4.375e-05,
+      "loss": 1.6505,
       "step": 140
     },
     {
       "epoch": 0.7509386733416771,
+      "grad_norm": 1.0501582622528076,
+      "learning_rate": 4.6875e-05,
+      "loss": 1.672,
       "step": 150
     },
     {
       "epoch": 0.8010012515644556,
+      "grad_norm": 1.9757583141326904,
+      "learning_rate": 5e-05,
+      "loss": 1.6418,
       "step": 160
     },
     {
       "epoch": 0.851063829787234,
+      "grad_norm": 2.322604179382324,
+      "learning_rate": 4.965083798882682e-05,
+      "loss": 1.5421,
       "step": 170
     },
     {
       "epoch": 0.9011264080100125,
+      "grad_norm": 1.9013667106628418,
+      "learning_rate": 4.9301675977653635e-05,
+      "loss": 1.5342,
       "step": 180
     },
     {
       "epoch": 0.951188986232791,
+      "grad_norm": 2.9960286617279053,
+      "learning_rate": 4.8952513966480454e-05,
+      "loss": 1.4829,
       "step": 190
     },
     {
       "epoch": 0.9962453066332916,
+      "eval_accuracy": 0.6456310679611651,
+      "eval_loss": 1.292258620262146,
+      "eval_runtime": 4.7953,
+      "eval_samples_per_second": 1417.633,
+      "eval_steps_per_second": 44.418,
       "step": 199
     },
     {
       "epoch": 1.0050062578222778,
+      "grad_norm": 2.5933773517608643,
+      "learning_rate": 4.860335195530727e-05,
+      "loss": 1.4948,
       "step": 200
     },
     {
       "epoch": 1.0550688360450564,
+      "grad_norm": 2.091658115386963,
+      "learning_rate": 4.825418994413408e-05,
+      "loss": 1.3361,
       "step": 210
     },
     {
       "epoch": 1.1051314142678348,
+      "grad_norm": 7.571577548980713,
+      "learning_rate": 4.790502793296089e-05,
+      "loss": 1.4043,
       "step": 220
     },
     {
       "epoch": 1.1551939924906134,
+      "grad_norm": 1.8106225728988647,
+      "learning_rate": 4.755586592178771e-05,
+      "loss": 1.2421,
       "step": 230
     },
     {
       "epoch": 1.2052565707133918,
+      "grad_norm": 1.4783036708831787,
+      "learning_rate": 4.7206703910614525e-05,
+      "loss": 1.2293,
       "step": 240
     },
     {
       "epoch": 1.2553191489361701,
+      "grad_norm": 3.992377996444702,
+      "learning_rate": 4.6857541899441344e-05,
+      "loss": 1.1909,
       "step": 250
     },
     {
       "epoch": 1.3053817271589487,
+      "grad_norm": 3.2013051509857178,
+      "learning_rate": 4.6508379888268164e-05,
+      "loss": 1.182,
       "step": 260
     },
     {
       "epoch": 1.355444305381727,
+      "grad_norm": 2.9630327224731445,
+      "learning_rate": 4.615921787709498e-05,
+      "loss": 1.1559,
       "step": 270
     },
     {
       "epoch": 1.4055068836045057,
+      "grad_norm": 3.330141544342041,
+      "learning_rate": 4.581005586592179e-05,
+      "loss": 1.1693,
       "step": 280
     },
     {
       "epoch": 1.455569461827284,
+      "grad_norm": 2.7530879974365234,
+      "learning_rate": 4.54608938547486e-05,
+      "loss": 1.1137,
       "step": 290
     },
     {
       "epoch": 1.5056320400500627,
+      "grad_norm": 1.702641248703003,
+      "learning_rate": 4.511173184357542e-05,
+      "loss": 1.1002,
       "step": 300
     },
     {
       "epoch": 1.555694618272841,
+      "grad_norm": 2.645132303237915,
+      "learning_rate": 4.4762569832402234e-05,
+      "loss": 1.1707,
       "step": 310
     },
     {
       "epoch": 1.6057571964956194,
+      "grad_norm": 2.5694878101348877,
+      "learning_rate": 4.4413407821229054e-05,
+      "loss": 1.0454,
       "step": 320
     },
     {
       "epoch": 1.655819774718398,
+      "grad_norm": 3.6792080402374268,
+      "learning_rate": 4.406424581005587e-05,
+      "loss": 1.0544,
       "step": 330
     },
     {
       "epoch": 1.7058823529411766,
+      "grad_norm": 6.041355609893799,
+      "learning_rate": 4.3715083798882686e-05,
+      "loss": 1.0394,
       "step": 340
     },
     {
       "epoch": 1.7559449311639548,
+      "grad_norm": 3.808980941772461,
+      "learning_rate": 4.33659217877095e-05,
+      "loss": 0.9862,
       "step": 350
     },
     {
       "epoch": 1.8060075093867334,
+      "grad_norm": 2.6217703819274902,
+      "learning_rate": 4.301675977653631e-05,
+      "loss": 0.9754,
       "step": 360
     },
     {
       "epoch": 1.856070087609512,
+      "grad_norm": 3.5803720951080322,
+      "learning_rate": 4.266759776536313e-05,
+      "loss": 0.932,
       "step": 370
     },
     {
       "epoch": 1.9061326658322904,
+      "grad_norm": 2.712857961654663,
+      "learning_rate": 4.2318435754189944e-05,
+      "loss": 0.8815,
       "step": 380
     },
     {
       "epoch": 1.9561952440550687,
+      "grad_norm": 4.25401496887207,
+      "learning_rate": 4.1969273743016764e-05,
+      "loss": 0.8844,
       "step": 390
     },
     {
       "epoch": 1.9962453066332917,
+      "eval_accuracy": 0.7917034421888791,
+      "eval_loss": 0.6811065673828125,
+      "eval_runtime": 4.7675,
+      "eval_samples_per_second": 1425.901,
+      "eval_steps_per_second": 44.677,
       "step": 398
     },
     {
       "epoch": 2.0100125156445556,
+      "grad_norm": 3.6374008655548096,
+      "learning_rate": 4.1620111731843576e-05,
+      "loss": 0.8524,
       "step": 400
     },
     {
       "epoch": 2.0600750938673342,
+      "grad_norm": 5.184952259063721,
+      "learning_rate": 4.1270949720670396e-05,
+      "loss": 0.8132,
       "step": 410
     },
     {
       "epoch": 2.110137672090113,
+      "grad_norm": 4.079555988311768,
+      "learning_rate": 4.092178770949721e-05,
+      "loss": 0.7761,
       "step": 420
     },
     {
       "epoch": 2.160200250312891,
+      "grad_norm": 2.930490016937256,
+      "learning_rate": 4.057262569832402e-05,
+      "loss": 0.7665,
       "step": 430
     },
     {
       "epoch": 2.2102628285356696,
+      "grad_norm": 3.523425579071045,
+      "learning_rate": 4.022346368715084e-05,
+      "loss": 0.6984,
       "step": 440
     },
     {
       "epoch": 2.260325406758448,
+      "grad_norm": 2.8467819690704346,
+      "learning_rate": 3.9874301675977654e-05,
+      "loss": 0.6988,
       "step": 450
     },
     {
       "epoch": 2.3103879849812268,
+      "grad_norm": 3.579084634780884,
+      "learning_rate": 3.952513966480447e-05,
+      "loss": 0.6424,
       "step": 460
     },
     {
       "epoch": 2.360450563204005,
+      "grad_norm": 4.213118076324463,
+      "learning_rate": 3.9175977653631286e-05,
+      "loss": 0.607,
       "step": 470
     },
     {
       "epoch": 2.4105131414267835,
+      "grad_norm": 2.531745672225952,
+      "learning_rate": 3.8826815642458106e-05,
+      "loss": 0.608,
       "step": 480
     },
     {
       "epoch": 2.460575719649562,
+      "grad_norm": 2.7486679553985596,
+      "learning_rate": 3.847765363128492e-05,
+      "loss": 0.5296,
       "step": 490
     },
     {
       "epoch": 2.5106382978723403,
+      "grad_norm": 4.085735321044922,
+      "learning_rate": 3.812849162011173e-05,
+      "loss": 0.5622,
       "step": 500
     },
     {
       "epoch": 2.560700876095119,
+      "grad_norm": 4.96565580368042,
+      "learning_rate": 3.777932960893855e-05,
+      "loss": 0.524,
       "step": 510
     },
     {
       "epoch": 2.6107634543178975,
+      "grad_norm": 4.465268611907959,
+      "learning_rate": 3.743016759776536e-05,
+      "loss": 0.4994,
       "step": 520
     },
     {
       "epoch": 2.660826032540676,
+      "grad_norm": 3.813931465148926,
+      "learning_rate": 3.708100558659218e-05,
+      "loss": 0.5042,
       "step": 530
     },
     {
       "epoch": 2.710888610763454,
+      "grad_norm": 2.1845741271972656,
+      "learning_rate": 3.6731843575418996e-05,
+      "loss": 0.4694,
       "step": 540
     },
     {
       "epoch": 2.760951188986233,
+      "grad_norm": 5.71811580657959,
+      "learning_rate": 3.6382681564245815e-05,
+      "loss": 0.4558,
       "step": 550
     },
     {
       "epoch": 2.8110137672090114,
+      "grad_norm": 4.474643230438232,
+      "learning_rate": 3.603351955307263e-05,
+      "loss": 0.4703,
       "step": 560
     },
     {
       "epoch": 2.8610763454317896,
+      "grad_norm": 3.506659746170044,
+      "learning_rate": 3.568435754189944e-05,
+      "loss": 0.4196,
       "step": 570
     },
     {
       "epoch": 2.911138923654568,
+      "grad_norm": 3.286609172821045,
+      "learning_rate": 3.5335195530726253e-05,
+      "loss": 0.433,
       "step": 580
     },
     {
       "epoch": 2.9612015018773468,
+      "grad_norm": 3.182314872741699,
+      "learning_rate": 3.498603351955307e-05,
+      "loss": 0.4241,
       "step": 590
     },
     {
       "epoch": 2.9962453066332917,
+      "eval_accuracy": 0.9267431597528685,
+      "eval_loss": 0.2847038805484772,
+      "eval_runtime": 4.9241,
+      "eval_samples_per_second": 1380.567,
+      "eval_steps_per_second": 43.257,
       "step": 597
     },
     {
       "epoch": 3.0150187734668337,
+      "grad_norm": 5.002447128295898,
+      "learning_rate": 3.463687150837989e-05,
+      "loss": 0.4553,
       "step": 600
     },
     {
       "epoch": 3.065081351689612,
+      "grad_norm": 4.032111644744873,
+      "learning_rate": 3.4287709497206705e-05,
+      "loss": 0.384,
       "step": 610
     },
     {
       "epoch": 3.1151439299123904,
+      "grad_norm": 1.9067034721374512,
+      "learning_rate": 3.3938547486033525e-05,
+      "loss": 0.3805,
       "step": 620
     },
     {
       "epoch": 3.165206508135169,
+      "grad_norm": 3.4726545810699463,
+      "learning_rate": 3.358938547486034e-05,
+      "loss": 0.4047,
       "step": 630
     },
     {
       "epoch": 3.2152690863579476,
+      "grad_norm": 2.600872039794922,
+      "learning_rate": 3.324022346368715e-05,
+      "loss": 0.3692,
       "step": 640
     },
     {
       "epoch": 3.2653316645807258,
+      "grad_norm": 2.484164237976074,
+      "learning_rate": 3.289106145251396e-05,
+      "loss": 0.3144,
       "step": 650
     },
     {
       "epoch": 3.3153942428035044,
+      "grad_norm": 2.312174081802368,
+      "learning_rate": 3.254189944134078e-05,
+      "loss": 0.3164,
       "step": 660
     },
     {
       "epoch": 3.365456821026283,
+      "grad_norm": 3.2117793560028076,
+      "learning_rate": 3.2192737430167595e-05,
+      "loss": 0.3156,
       "step": 670
     },
     {
       "epoch": 3.415519399249061,
+      "grad_norm": 3.4549670219421387,
+      "learning_rate": 3.1843575418994415e-05,
+      "loss": 0.3241,
       "step": 680
     },
     {
       "epoch": 3.4655819774718397,
+      "grad_norm": 2.3556506633758545,
+      "learning_rate": 3.1494413407821234e-05,
+      "loss": 0.3257,
       "step": 690
     },
     {
       "epoch": 3.5156445556946183,
+      "grad_norm": 2.6974313259124756,
+      "learning_rate": 3.114525139664805e-05,
+      "loss": 0.3244,
       "step": 700
     },
     {
       "epoch": 3.565707133917397,
+      "grad_norm": 1.642268419265747,
+      "learning_rate": 3.079608938547486e-05,
+      "loss": 0.2919,
       "step": 710
     },
     {
       "epoch": 3.615769712140175,
+      "grad_norm": 3.0940229892730713,
+      "learning_rate": 3.0446927374301676e-05,
+      "loss": 0.2842,
       "step": 720
     },
     {
       "epoch": 3.6658322903629537,
+      "grad_norm": 2.1800785064697266,
+      "learning_rate": 3.0097765363128492e-05,
+      "loss": 0.2938,
       "step": 730
     },
     {
       "epoch": 3.7158948685857323,
+      "grad_norm": 3.684602737426758,
+      "learning_rate": 2.9748603351955305e-05,
+      "loss": 0.2789,
       "step": 740
     },
     {
       "epoch": 3.7659574468085104,
+      "grad_norm": 4.337268829345703,
+      "learning_rate": 2.9399441340782125e-05,
+      "loss": 0.2898,
       "step": 750
     },
     {
       "epoch": 3.816020025031289,
+      "grad_norm": 2.6104233264923096,
+      "learning_rate": 2.9050279329608944e-05,
+      "loss": 0.2588,
       "step": 760
     },
     {
       "epoch": 3.8660826032540676,
+      "grad_norm": 3.2375924587249756,
+      "learning_rate": 2.8701117318435757e-05,
+      "loss": 0.2831,
       "step": 770
     },
     {
       "epoch": 3.916145181476846,
+      "grad_norm": 2.4927661418914795,
+      "learning_rate": 2.8351955307262573e-05,
+      "loss": 0.2541,
       "step": 780
     },
     {
       "epoch": 3.966207759699625,
+      "grad_norm": 3.670045852661133,
+      "learning_rate": 2.8002793296089386e-05,
+      "loss": 0.2724,
       "step": 790
     },
     {
       "epoch": 3.9962453066332917,
+      "eval_accuracy": 0.973080317740512,
+      "eval_loss": 0.15457233786582947,
+      "eval_runtime": 4.9561,
+      "eval_samples_per_second": 1371.635,
+      "eval_steps_per_second": 42.977,
       "step": 796
     },
     {
       "epoch": 4.020025031289111,
+      "grad_norm": 2.9852592945098877,
+      "learning_rate": 2.7653631284916205e-05,
+      "loss": 0.2705,
       "step": 800
     },
     {
       "epoch": 4.07008760951189,
+      "grad_norm": 2.433262825012207,
+      "learning_rate": 2.7304469273743018e-05,
+      "loss": 0.2544,
       "step": 810
     },
     {
       "epoch": 4.1201501877346685,
+      "grad_norm": 3.54846453666687,
+      "learning_rate": 2.6955307262569834e-05,
+      "loss": 0.2586,
       "step": 820
     },
     {
       "epoch": 4.170212765957447,
+      "grad_norm": 2.550708770751953,
+      "learning_rate": 2.6606145251396647e-05,
+      "loss": 0.2406,
       "step": 830
     },
     {
       "epoch": 4.220275344180226,
+      "grad_norm": 4.386864185333252,
+      "learning_rate": 2.6256983240223466e-05,
+      "loss": 0.2686,
       "step": 840
     },
     {
       "epoch": 4.270337922403003,
+      "grad_norm": 2.19413161277771,
+      "learning_rate": 2.5907821229050283e-05,
+      "loss": 0.2452,
       "step": 850
     },
     {
       "epoch": 4.320400500625782,
+      "grad_norm": 2.9501962661743164,
+      "learning_rate": 2.5558659217877095e-05,
+      "loss": 0.2245,
       "step": 860
     },
     {
       "epoch": 4.370463078848561,
+      "grad_norm": 3.0304136276245117,
+      "learning_rate": 2.5209497206703915e-05,
+      "loss": 0.2344,
       "step": 870
     },
     {
       "epoch": 4.420525657071339,
+      "grad_norm": 2.301668167114258,
+      "learning_rate": 2.4860335195530728e-05,
+      "loss": 0.2682,
       "step": 880
     },
     {
       "epoch": 4.470588235294118,
+      "grad_norm": 2.6299359798431396,
+      "learning_rate": 2.4511173184357544e-05,
+      "loss": 0.2461,
       "step": 890
     },
     {
       "epoch": 4.520650813516896,
+      "grad_norm": 1.7917237281799316,
+      "learning_rate": 2.416201117318436e-05,
+      "loss": 0.2071,
       "step": 900
     },
     {
       "epoch": 4.570713391739675,
+      "grad_norm": 1.8918017148971558,
+      "learning_rate": 2.3812849162011176e-05,
+      "loss": 0.2219,
       "step": 910
     },
     {
       "epoch": 4.6207759699624535,
+      "grad_norm": 2.058867931365967,
+      "learning_rate": 2.346368715083799e-05,
+      "loss": 0.2307,
       "step": 920
     },
     {
       "epoch": 4.670838548185231,
+      "grad_norm": 1.6558533906936646,
+      "learning_rate": 2.3114525139664805e-05,
+      "loss": 0.2127,
       "step": 930
     },
     {
       "epoch": 4.72090112640801,
+      "grad_norm": 1.8600987195968628,
+      "learning_rate": 2.276536312849162e-05,
+      "loss": 0.2245,
       "step": 940
     },
     {
       "epoch": 4.7709637046307884,
+      "grad_norm": 3.023834466934204,
+      "learning_rate": 2.2416201117318437e-05,
+      "loss": 0.254,
       "step": 950
     },
     {
       "epoch": 4.821026282853567,
+      "grad_norm": 3.464167833328247,
+      "learning_rate": 2.206703910614525e-05,
+      "loss": 0.2364,
       "step": 960
     },
     {
       "epoch": 4.871088861076346,
+      "grad_norm": 2.8578057289123535,
+      "learning_rate": 2.171787709497207e-05,
+      "loss": 0.2414,
       "step": 970
     },
     {
       "epoch": 4.921151439299124,
+      "grad_norm": 1.949834942817688,
+      "learning_rate": 2.1368715083798886e-05,
+      "loss": 0.2368,
       "step": 980
     },
     {
       "epoch": 4.971214017521902,
+      "grad_norm": 3.3509461879730225,
+      "learning_rate": 2.10195530726257e-05,
+      "loss": 0.2362,
       "step": 990
     },
     {
       "epoch": 4.996245306633291,
+      "eval_accuracy": 0.9760223595175052,
+      "eval_loss": 0.1284540593624115,
+      "eval_runtime": 4.9643,
+      "eval_samples_per_second": 1369.379,
+      "eval_steps_per_second": 42.906,
       "step": 995
     },
     {
       "epoch": 5.025031289111389,
+      "grad_norm": 2.664990186691284,
+      "learning_rate": 2.0670391061452515e-05,
+      "loss": 0.2298,
       "step": 1000
     },
     {
       "epoch": 5.075093867334168,
+      "grad_norm": 1.4740562438964844,
+      "learning_rate": 2.032122905027933e-05,
+      "loss": 0.2125,
       "step": 1010
     },
     {
       "epoch": 5.1251564455569465,
+      "grad_norm": 3.666245698928833,
+      "learning_rate": 1.9972067039106147e-05,
+      "loss": 0.186,
       "step": 1020
     },
     {
       "epoch": 5.175219023779725,
+      "grad_norm": 2.3000710010528564,
+      "learning_rate": 1.962290502793296e-05,
+      "loss": 0.2225,
       "step": 1030
     },
     {
       "epoch": 5.225281602002503,
+      "grad_norm": 2.916823625564575,
+      "learning_rate": 1.9273743016759776e-05,
+      "loss": 0.1999,
       "step": 1040
     },
     {
       "epoch": 5.275344180225281,
+      "grad_norm": 2.1694273948669434,
+      "learning_rate": 1.8924581005586592e-05,
+      "loss": 0.1737,
       "step": 1050
     },
     {
       "epoch": 5.32540675844806,
+      "grad_norm": 2.1091065406799316,
+      "learning_rate": 1.857541899441341e-05,
+      "loss": 0.1898,
       "step": 1060
     },
     {
       "epoch": 5.375469336670839,
+      "grad_norm": 2.4666635990142822,
+      "learning_rate": 1.8226256983240224e-05,
+      "loss": 0.2115,
       "step": 1070
     },
     {
       "epoch": 5.425531914893617,
+      "grad_norm": 2.0962629318237305,
+      "learning_rate": 1.787709497206704e-05,
+      "loss": 0.2189,
       "step": 1080
     },
     {
       "epoch": 5.475594493116396,
+      "grad_norm": 1.7196861505508423,
+      "learning_rate": 1.7527932960893857e-05,
+      "loss": 0.2013,
       "step": 1090
     },
     {
       "epoch": 5.5256570713391735,
+      "grad_norm": 2.7366960048675537,
+      "learning_rate": 1.7178770949720673e-05,
+      "loss": 0.2257,
       "step": 1100
     },
     {
       "epoch": 5.575719649561952,
+      "grad_norm": 2.12593412399292,
+      "learning_rate": 1.6829608938547485e-05,
+      "loss": 0.2162,
       "step": 1110
     },
     {
       "epoch": 5.625782227784731,
+      "grad_norm": 1.8080042600631714,
+      "learning_rate": 1.64804469273743e-05,
+      "loss": 0.2295,
       "step": 1120
     },
     {
       "epoch": 5.675844806007509,
+      "grad_norm": 2.192559242248535,
+      "learning_rate": 1.6131284916201118e-05,
+      "loss": 0.2077,
       "step": 1130
     },
     {
       "epoch": 5.725907384230288,
+      "grad_norm": 2.823213577270508,
+      "learning_rate": 1.5782122905027934e-05,
+      "loss": 0.2017,
       "step": 1140
     },
     {
       "epoch": 5.7759699624530665,
+      "grad_norm": 3.1835837364196777,
+      "learning_rate": 1.543296089385475e-05,
+      "loss": 0.1741,
       "step": 1150
     },
     {
       "epoch": 5.826032540675845,
+      "grad_norm": 3.540625810623169,
+      "learning_rate": 1.5083798882681566e-05,
+      "loss": 0.1902,
       "step": 1160
     },
     {
       "epoch": 5.876095118898624,
+      "grad_norm": 2.113506555557251,
+      "learning_rate": 1.473463687150838e-05,
+      "loss": 0.2406,
       "step": 1170
     },
     {
       "epoch": 5.926157697121401,
+      "grad_norm": 3.060197353363037,
+      "learning_rate": 1.4385474860335197e-05,
+      "loss": 0.1803,
       "step": 1180
     },
     {
       "epoch": 5.97622027534418,
+      "grad_norm": 1.7786002159118652,
+      "learning_rate": 1.4036312849162011e-05,
+      "loss": 0.1729,
       "step": 1190
     },
     {
       "epoch": 5.996245306633291,
+      "eval_accuracy": 0.9748455428067079,
+      "eval_loss": 0.12367700785398483,
+      "eval_runtime": 4.8461,
+      "eval_samples_per_second": 1402.788,
+      "eval_steps_per_second": 43.953,
       "step": 1194
     },
     {
       "epoch": 6.030037546933667,
+      "grad_norm": 1.7793939113616943,
+      "learning_rate": 1.3687150837988827e-05,
+      "loss": 0.2018,
       "step": 1200
     },
     {
       "epoch": 6.080100125156446,
+      "grad_norm": 2.2340643405914307,
+      "learning_rate": 1.3337988826815642e-05,
+      "loss": 0.1695,
       "step": 1210
     },
     {
       "epoch": 6.130162703379224,
+      "grad_norm": 1.621952772140503,
+      "learning_rate": 1.2988826815642458e-05,
+      "loss": 0.1791,
       "step": 1220
     },
     {
       "epoch": 6.180225281602002,
+      "grad_norm": 2.7437918186187744,
+      "learning_rate": 1.2639664804469276e-05,
+      "loss": 0.1956,
       "step": 1230
     },
     {
       "epoch": 6.230287859824781,
+      "grad_norm": 2.8166093826293945,
+      "learning_rate": 1.229050279329609e-05,
+      "loss": 0.1882,
       "step": 1240
     },
     {
       "epoch": 6.280350438047559,
+      "grad_norm": 1.636942982673645,
+      "learning_rate": 1.1941340782122906e-05,
+      "loss": 0.186,
       "step": 1250
     },
     {
       "epoch": 6.330413016270338,
+      "grad_norm": 2.7344369888305664,
+      "learning_rate": 1.1592178770949721e-05,
+      "loss": 0.1791,
       "step": 1260
     },
     {
       "epoch": 6.380475594493117,
+      "grad_norm": 2.310530662536621,
+      "learning_rate": 1.1243016759776537e-05,
+      "loss": 0.1743,
       "step": 1270
     },
     {
       "epoch": 6.430538172715895,
+      "grad_norm": 2.937742233276367,
+      "learning_rate": 1.0893854748603351e-05,
+      "loss": 0.1898,
       "step": 1280
     },
     {
       "epoch": 6.480600750938673,
+      "grad_norm": 2.7743396759033203,
+      "learning_rate": 1.054469273743017e-05,
+      "loss": 0.182,
       "step": 1290
     },
     {
       "epoch": 6.5306633291614515,
+      "grad_norm": 2.591944694519043,
+      "learning_rate": 1.0195530726256984e-05,
+      "loss": 0.1809,
       "step": 1300
     },
     {
       "epoch": 6.58072590738423,
+      "grad_norm": 2.280179977416992,
+      "learning_rate": 9.8463687150838e-06,
+      "loss": 0.1882,
       "step": 1310
     },
     {
       "epoch": 6.630788485607009,
+      "grad_norm": 2.2294774055480957,
+      "learning_rate": 9.497206703910614e-06,
+      "loss": 0.1624,
       "step": 1320
     },
     {
       "epoch": 6.680851063829787,
+      "grad_norm": 2.2053873538970947,
+      "learning_rate": 9.14804469273743e-06,
+      "loss": 0.1701,
       "step": 1330
     },
     {
       "epoch": 6.730913642052566,
+      "grad_norm": 1.1514060497283936,
+      "learning_rate": 8.798882681564247e-06,
+      "loss": 0.1709,
       "step": 1340
     },
     {
       "epoch": 6.7809762202753445,
+      "grad_norm": 1.625301718711853,
+      "learning_rate": 8.449720670391061e-06,
+      "loss": 0.1803,
       "step": 1350
     },
     {
       "epoch": 6.831038798498122,
+      "grad_norm": 1.733961582183838,
+      "learning_rate": 8.100558659217877e-06,
+      "loss": 0.1792,
       "step": 1360
     },
     {
       "epoch": 6.881101376720901,
+      "grad_norm": 2.374742269515991,
+      "learning_rate": 7.751396648044693e-06,
+      "loss": 0.1711,
       "step": 1370
     },
     {
       "epoch": 6.931163954943679,
+      "grad_norm": 2.3548731803894043,
+      "learning_rate": 7.4022346368715096e-06,
+      "loss": 0.1864,
       "step": 1380
     },
     {
       "epoch": 6.981226533166458,
+      "grad_norm": 1.3292882442474365,
+      "learning_rate": 7.053072625698325e-06,
+      "loss": 0.1632,
       "step": 1390
     },
     {
       "epoch": 6.996245306633291,
+      "eval_accuracy": 0.9767578699617535,
+      "eval_loss": 0.10951310396194458,
+      "eval_runtime": 5.2014,
+      "eval_samples_per_second": 1306.955,
+      "eval_steps_per_second": 40.95,
       "step": 1393
     },
     {
       "epoch": 7.035043804755945,
+      "grad_norm": 2.334541082382202,
+      "learning_rate": 6.70391061452514e-06,
+      "loss": 0.1726,
       "step": 1400
     },
     {
       "epoch": 7.085106382978723,
+      "grad_norm": 2.8630871772766113,
+      "learning_rate": 6.3547486033519555e-06,
+      "loss": 0.1775,
       "step": 1410
     },
     {
       "epoch": 7.135168961201502,
+      "grad_norm": 1.685805320739746,
+      "learning_rate": 6.005586592178771e-06,
+      "loss": 0.1886,
       "step": 1420
     },
     {
       "epoch": 7.18523153942428,
+      "grad_norm": 2.9282922744750977,
+      "learning_rate": 5.656424581005587e-06,
+      "loss": 0.1477,
       "step": 1430
     },
     {
       "epoch": 7.235294117647059,
+      "grad_norm": 2.2304320335388184,
+      "learning_rate": 5.307262569832402e-06,
+      "loss": 0.1692,
       "step": 1440
     },
     {
       "epoch": 7.2853566958698375,
+      "grad_norm": 2.634939193725586,
+      "learning_rate": 4.958100558659218e-06,
+      "loss": 0.1998,
       "step": 1450
     },
     {
       "epoch": 7.335419274092616,
+      "grad_norm": 2.268406629562378,
+      "learning_rate": 4.608938547486034e-06,
+      "loss": 0.1655,
       "step": 1460
     },
     {
       "epoch": 7.385481852315394,
+      "grad_norm": 1.96724534034729,
+      "learning_rate": 4.259776536312849e-06,
+      "loss": 0.1619,
       "step": 1470
     },
     {
       "epoch": 7.435544430538172,
+      "grad_norm": 2.0757083892822266,
+      "learning_rate": 3.910614525139665e-06,
+      "loss": 0.1676,
       "step": 1480
     },
     {
       "epoch": 7.485607008760951,
+      "grad_norm": 2.4496700763702393,
+      "learning_rate": 3.5614525139664804e-06,
+      "loss": 0.1558,
       "step": 1490
     },
     {
       "epoch": 7.53566958698373,
+      "grad_norm": 3.963124990463257,
+      "learning_rate": 3.212290502793296e-06,
+      "loss": 0.1703,
       "step": 1500
     },
     {
       "epoch": 7.585732165206508,
+      "grad_norm": 3.033352851867676,
+      "learning_rate": 2.863128491620112e-06,
+      "loss": 0.1643,
       "step": 1510
     },
     {
       "epoch": 7.635794743429287,
+      "grad_norm": 2.1735336780548096,
+      "learning_rate": 2.5139664804469276e-06,
+      "loss": 0.176,
       "step": 1520
     },
     {
       "epoch": 7.685857321652065,
+      "grad_norm": 2.0125844478607178,
+      "learning_rate": 2.1648044692737433e-06,
+      "loss": 0.1829,
       "step": 1530
     },
     {
       "epoch": 7.735919899874844,
+      "grad_norm": 1.9535727500915527,
+      "learning_rate": 1.815642458100559e-06,
+      "loss": 0.1864,
       "step": 1540
     },
     {
       "epoch": 7.785982478097622,
+      "grad_norm": 2.61956524848938,
+      "learning_rate": 1.4664804469273743e-06,
+      "loss": 0.1847,
       "step": 1550
     },
     {
       "epoch": 7.8360450563204,
+      "grad_norm": 1.4061027765274048,
+      "learning_rate": 1.11731843575419e-06,
+      "loss": 0.1786,
       "step": 1560
     },
     {
       "epoch": 7.886107634543179,
+      "grad_norm": 2.1694343090057373,
+      "learning_rate": 7.681564245810056e-07,
+      "loss": 0.1745,
       "step": 1570
     },
     {
       "epoch": 7.9361702127659575,
+      "grad_norm": 1.2118408679962158,
+      "learning_rate": 4.1899441340782126e-07,
+      "loss": 0.1668,
       "step": 1580
     },
     {
       "epoch": 7.986232790988736,
+      "grad_norm": 1.914755940437317,
+      "learning_rate": 6.983240223463688e-08,
+      "loss": 0.1642,
       "step": 1590
     },
     {
       "epoch": 7.996245306633291,
+      "eval_accuracy": 0.9773462783171522,
+      "eval_loss": 0.11222562193870544,
+      "eval_runtime": 5.6208,
+      "eval_samples_per_second": 1209.438,
+      "eval_steps_per_second": 37.895,
       "step": 1592
     },
     {
       "epoch": 7.996245306633291,
       "step": 1592,
       "total_flos": 3.777723239743488e+18,
+      "train_loss": 0.6284143277373745,
+      "train_runtime": 641.8193,
+      "train_samples_per_second": 636.865,
+      "train_steps_per_second": 2.48
     }
   ],
   "logging_steps": 10,