End of training

Browse files

Files changed (7) hide show

README.md +17 -4
all_results.json +13 -0
eval_results.json +8 -0
train_results.json +8 -0
trainer_state.json +2354 -0
wandb/run-20250214_113805-769lwzm2/files/output.log +158 -0
wandb/run-20250214_113805-769lwzm2/run-769lwzm2.wandb +2 -2

README.md CHANGED Viewed

@@ -3,20 +3,33 @@ library_name: transformers
 license: apache-2.0
 base_model: openai/whisper-base
 tags:
 - generated_from_trainer
 metrics:
 - wer
 model-index:
-- name: openai/whisper-base
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# openai/whisper-base
-This model is a fine-tuned version of [openai/whisper-base](https://huggingface.co/openai/whisper-base) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.2452
 - Wer: 13.8170

 license: apache-2.0
 base_model: openai/whisper-base
 tags:
+- whisper-event
 - generated_from_trainer
+datasets:
+- asierhv/composite_corpus_eu_v2.1
 metrics:
 - wer
 model-index:
+- name: Whisper Base Basque
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: asierhv/composite_corpus_eu_v2.1
+      type: asierhv/composite_corpus_eu_v2.1
+    metrics:
+    - name: Wer
+      type: wer
+      value: 13.816958025614658
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# Whisper Base Basque
+This model is a fine-tuned version of [openai/whisper-base](https://huggingface.co/openai/whisper-base) on the asierhv/composite_corpus_eu_v2.1 dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.2452
 - Wer: 13.8170

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 0.24521400034427643,
+    "eval_runtime": 74.5154,
+    "eval_samples_per_second": 28.236,
+    "eval_steps_per_second": 1.771,
+    "eval_wer": 13.816958025614658,
+    "total_flos": 1.660415901696e+19,
+    "train_loss": 0.22206098145246506,
+    "train_runtime": 4270.5513,
+    "train_samples_per_second": 59.945,
+    "train_steps_per_second": 1.873
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 0.24521400034427643,
+    "eval_runtime": 74.5154,
+    "eval_samples_per_second": 28.236,
+    "eval_steps_per_second": 1.771,
+    "eval_wer": 13.816958025614658
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "total_flos": 1.660415901696e+19,
+    "train_loss": 0.22206098145246506,
+    "train_runtime": 4270.5513,
+    "train_samples_per_second": 59.945,
+    "train_steps_per_second": 1.873
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,2354 @@

+{
+  "best_metric": 13.816958025614658,
+  "best_model_checkpoint": "./checkpoint-8000",
+  "epoch": 1.0,
+  "eval_steps": 1000,
+  "global_step": 8000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.003125,
+      "grad_norm": 21.98771095275879,
+      "learning_rate": 8.8e-07,
+      "loss": 3.1382,
+      "step": 25
+    },
+    {
+      "epoch": 0.00625,
+      "grad_norm": 11.417325019836426,
+      "learning_rate": 1.8800000000000002e-06,
+      "loss": 2.4524,
+      "step": 50
+    },
+    {
+      "epoch": 0.009375,
+      "grad_norm": 9.939048767089844,
+      "learning_rate": 2.88e-06,
+      "loss": 1.6386,
+      "step": 75
+    },
+    {
+      "epoch": 0.0125,
+      "grad_norm": 8.167191505432129,
+      "learning_rate": 3.88e-06,
+      "loss": 1.26,
+      "step": 100
+    },
+    {
+      "epoch": 0.015625,
+      "grad_norm": 7.114121913909912,
+      "learning_rate": 4.880000000000001e-06,
+      "loss": 1.0564,
+      "step": 125
+    },
+    {
+      "epoch": 0.01875,
+      "grad_norm": 6.8056840896606445,
+      "learning_rate": 5.8800000000000005e-06,
+      "loss": 0.897,
+      "step": 150
+    },
+    {
+      "epoch": 0.021875,
+      "grad_norm": 6.363353252410889,
+      "learning_rate": 6.88e-06,
+      "loss": 0.8252,
+      "step": 175
+    },
+    {
+      "epoch": 0.025,
+      "grad_norm": 6.622057914733887,
+      "learning_rate": 7.88e-06,
+      "loss": 0.7693,
+      "step": 200
+    },
+    {
+      "epoch": 0.028125,
+      "grad_norm": 5.045984745025635,
+      "learning_rate": 8.880000000000001e-06,
+      "loss": 0.6621,
+      "step": 225
+    },
+    {
+      "epoch": 0.03125,
+      "grad_norm": 7.978261947631836,
+      "learning_rate": 9.88e-06,
+      "loss": 0.6861,
+      "step": 250
+    },
+    {
+      "epoch": 0.034375,
+      "grad_norm": 6.535711288452148,
+      "learning_rate": 1.0880000000000001e-05,
+      "loss": 0.6888,
+      "step": 275
+    },
+    {
+      "epoch": 0.0375,
+      "grad_norm": 6.781430721282959,
+      "learning_rate": 1.188e-05,
+      "loss": 0.6648,
+      "step": 300
+    },
+    {
+      "epoch": 0.040625,
+      "grad_norm": 5.826904773712158,
+      "learning_rate": 1.2880000000000002e-05,
+      "loss": 0.5983,
+      "step": 325
+    },
+    {
+      "epoch": 0.04375,
+      "grad_norm": 5.830564975738525,
+      "learning_rate": 1.3880000000000001e-05,
+      "loss": 0.5272,
+      "step": 350
+    },
+    {
+      "epoch": 0.046875,
+      "grad_norm": 5.638543128967285,
+      "learning_rate": 1.4880000000000002e-05,
+      "loss": 0.4479,
+      "step": 375
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 4.451868057250977,
+      "learning_rate": 1.588e-05,
+      "loss": 0.4341,
+      "step": 400
+    },
+    {
+      "epoch": 0.053125,
+      "grad_norm": 4.475216865539551,
+      "learning_rate": 1.688e-05,
+      "loss": 0.3916,
+      "step": 425
+    },
+    {
+      "epoch": 0.05625,
+      "grad_norm": 4.683038711547852,
+      "learning_rate": 1.788e-05,
+      "loss": 0.3745,
+      "step": 450
+    },
+    {
+      "epoch": 0.059375,
+      "grad_norm": 4.93367862701416,
+      "learning_rate": 1.8880000000000002e-05,
+      "loss": 0.3569,
+      "step": 475
+    },
+    {
+      "epoch": 0.0625,
+      "grad_norm": 4.0592041015625,
+      "learning_rate": 1.9880000000000003e-05,
+      "loss": 0.3336,
+      "step": 500
+    },
+    {
+      "epoch": 0.065625,
+      "grad_norm": 5.144535064697266,
+      "learning_rate": 1.9941333333333335e-05,
+      "loss": 0.3092,
+      "step": 525
+    },
+    {
+      "epoch": 0.06875,
+      "grad_norm": 4.806638717651367,
+      "learning_rate": 1.9874666666666668e-05,
+      "loss": 0.3106,
+      "step": 550
+    },
+    {
+      "epoch": 0.071875,
+      "grad_norm": 4.3809638023376465,
+      "learning_rate": 1.9808e-05,
+      "loss": 0.2971,
+      "step": 575
+    },
+    {
+      "epoch": 0.075,
+      "grad_norm": 5.35611629486084,
+      "learning_rate": 1.9741333333333334e-05,
+      "loss": 0.2897,
+      "step": 600
+    },
+    {
+      "epoch": 0.078125,
+      "grad_norm": 4.62730598449707,
+      "learning_rate": 1.967466666666667e-05,
+      "loss": 0.2607,
+      "step": 625
+    },
+    {
+      "epoch": 0.08125,
+      "grad_norm": 3.60905122756958,
+      "learning_rate": 1.9608000000000003e-05,
+      "loss": 0.2765,
+      "step": 650
+    },
+    {
+      "epoch": 0.084375,
+      "grad_norm": 3.7476887702941895,
+      "learning_rate": 1.9541333333333336e-05,
+      "loss": 0.2661,
+      "step": 675
+    },
+    {
+      "epoch": 0.0875,
+      "grad_norm": 4.03351354598999,
+      "learning_rate": 1.947466666666667e-05,
+      "loss": 0.271,
+      "step": 700
+    },
+    {
+      "epoch": 0.090625,
+      "grad_norm": 6.19671106338501,
+      "learning_rate": 1.9408e-05,
+      "loss": 0.3045,
+      "step": 725
+    },
+    {
+      "epoch": 0.09375,
+      "grad_norm": 6.079224586486816,
+      "learning_rate": 1.9341333333333334e-05,
+      "loss": 0.4168,
+      "step": 750
+    },
+    {
+      "epoch": 0.096875,
+      "grad_norm": 5.619544982910156,
+      "learning_rate": 1.9274666666666667e-05,
+      "loss": 0.4038,
+      "step": 775
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 5.917449951171875,
+      "learning_rate": 1.9208000000000003e-05,
+      "loss": 0.4418,
+      "step": 800
+    },
+    {
+      "epoch": 0.103125,
+      "grad_norm": 3.9295430183410645,
+      "learning_rate": 1.9141333333333333e-05,
+      "loss": 0.3051,
+      "step": 825
+    },
+    {
+      "epoch": 0.10625,
+      "grad_norm": 3.26605486869812,
+      "learning_rate": 1.907466666666667e-05,
+      "loss": 0.2447,
+      "step": 850
+    },
+    {
+      "epoch": 0.109375,
+      "grad_norm": 4.2773051261901855,
+      "learning_rate": 1.9008e-05,
+      "loss": 0.2316,
+      "step": 875
+    },
+    {
+      "epoch": 0.1125,
+      "grad_norm": 5.690479755401611,
+      "learning_rate": 1.8941333333333334e-05,
+      "loss": 0.3193,
+      "step": 900
+    },
+    {
+      "epoch": 0.115625,
+      "grad_norm": 5.861849308013916,
+      "learning_rate": 1.8874666666666667e-05,
+      "loss": 0.3742,
+      "step": 925
+    },
+    {
+      "epoch": 0.11875,
+      "grad_norm": 5.605452537536621,
+      "learning_rate": 1.8808e-05,
+      "loss": 0.3622,
+      "step": 950
+    },
+    {
+      "epoch": 0.121875,
+      "grad_norm": 6.191706657409668,
+      "learning_rate": 1.8741333333333336e-05,
+      "loss": 0.5693,
+      "step": 975
+    },
+    {
+      "epoch": 0.125,
+      "grad_norm": 6.441635608673096,
+      "learning_rate": 1.867466666666667e-05,
+      "loss": 0.4951,
+      "step": 1000
+    },
+    {
+      "epoch": 0.125,
+      "eval_loss": 0.49006596207618713,
+      "eval_runtime": 77.9711,
+      "eval_samples_per_second": 26.984,
+      "eval_steps_per_second": 1.693,
+      "eval_wer": 27.05431429372721,
+      "step": 1000
+    },
+    {
+      "epoch": 0.128125,
+      "grad_norm": 5.985304832458496,
+      "learning_rate": 1.8608000000000002e-05,
+      "loss": 0.4113,
+      "step": 1025
+    },
+    {
+      "epoch": 0.13125,
+      "grad_norm": 5.439868450164795,
+      "learning_rate": 1.8541333333333335e-05,
+      "loss": 0.3116,
+      "step": 1050
+    },
+    {
+      "epoch": 0.134375,
+      "grad_norm": 3.6065237522125244,
+      "learning_rate": 1.8474666666666668e-05,
+      "loss": 0.223,
+      "step": 1075
+    },
+    {
+      "epoch": 0.1375,
+      "grad_norm": 3.4091665744781494,
+      "learning_rate": 1.8408e-05,
+      "loss": 0.1918,
+      "step": 1100
+    },
+    {
+      "epoch": 0.140625,
+      "grad_norm": 2.80391001701355,
+      "learning_rate": 1.8341333333333337e-05,
+      "loss": 0.1784,
+      "step": 1125
+    },
+    {
+      "epoch": 0.14375,
+      "grad_norm": 3.194566011428833,
+      "learning_rate": 1.8274666666666666e-05,
+      "loss": 0.1988,
+      "step": 1150
+    },
+    {
+      "epoch": 0.146875,
+      "grad_norm": 3.294611930847168,
+      "learning_rate": 1.8208000000000003e-05,
+      "loss": 0.2032,
+      "step": 1175
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 3.653074264526367,
+      "learning_rate": 1.8141333333333335e-05,
+      "loss": 0.1914,
+      "step": 1200
+    },
+    {
+      "epoch": 0.153125,
+      "grad_norm": 4.978360176086426,
+      "learning_rate": 1.8074666666666668e-05,
+      "loss": 0.2865,
+      "step": 1225
+    },
+    {
+      "epoch": 0.15625,
+      "grad_norm": 5.2725701332092285,
+      "learning_rate": 1.8008e-05,
+      "loss": 0.306,
+      "step": 1250
+    },
+    {
+      "epoch": 0.159375,
+      "grad_norm": 5.122636318206787,
+      "learning_rate": 1.7941333333333334e-05,
+      "loss": 0.3283,
+      "step": 1275
+    },
+    {
+      "epoch": 0.1625,
+      "grad_norm": 5.599034786224365,
+      "learning_rate": 1.787466666666667e-05,
+      "loss": 0.3472,
+      "step": 1300
+    },
+    {
+      "epoch": 0.165625,
+      "grad_norm": 5.392984867095947,
+      "learning_rate": 1.7808e-05,
+      "loss": 0.3073,
+      "step": 1325
+    },
+    {
+      "epoch": 0.16875,
+      "grad_norm": 4.690824508666992,
+      "learning_rate": 1.7741333333333336e-05,
+      "loss": 0.3096,
+      "step": 1350
+    },
+    {
+      "epoch": 0.171875,
+      "grad_norm": 5.487542152404785,
+      "learning_rate": 1.767466666666667e-05,
+      "loss": 0.2872,
+      "step": 1375
+    },
+    {
+      "epoch": 0.175,
+      "grad_norm": 5.0103440284729,
+      "learning_rate": 1.7608e-05,
+      "loss": 0.28,
+      "step": 1400
+    },
+    {
+      "epoch": 0.178125,
+      "grad_norm": 4.374607563018799,
+      "learning_rate": 1.7541333333333334e-05,
+      "loss": 0.2907,
+      "step": 1425
+    },
+    {
+      "epoch": 0.18125,
+      "grad_norm": 3.8199336528778076,
+      "learning_rate": 1.7474666666666667e-05,
+      "loss": 0.2721,
+      "step": 1450
+    },
+    {
+      "epoch": 0.184375,
+      "grad_norm": 3.263697862625122,
+      "learning_rate": 1.7408e-05,
+      "loss": 0.2018,
+      "step": 1475
+    },
+    {
+      "epoch": 0.1875,
+      "grad_norm": 3.686453104019165,
+      "learning_rate": 1.7341333333333333e-05,
+      "loss": 0.1804,
+      "step": 1500
+    },
+    {
+      "epoch": 0.190625,
+      "grad_norm": 3.122502088546753,
+      "learning_rate": 1.727466666666667e-05,
+      "loss": 0.1864,
+      "step": 1525
+    },
+    {
+      "epoch": 0.19375,
+      "grad_norm": 3.8203911781311035,
+      "learning_rate": 1.7208000000000002e-05,
+      "loss": 0.1822,
+      "step": 1550
+    },
+    {
+      "epoch": 0.196875,
+      "grad_norm": 3.1615259647369385,
+      "learning_rate": 1.7141333333333335e-05,
+      "loss": 0.1607,
+      "step": 1575
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 3.136589288711548,
+      "learning_rate": 1.7074666666666668e-05,
+      "loss": 0.1681,
+      "step": 1600
+    },
+    {
+      "epoch": 0.203125,
+      "grad_norm": 5.022261142730713,
+      "learning_rate": 1.7008000000000004e-05,
+      "loss": 0.2476,
+      "step": 1625
+    },
+    {
+      "epoch": 0.20625,
+      "grad_norm": 4.3968377113342285,
+      "learning_rate": 1.6941333333333333e-05,
+      "loss": 0.2578,
+      "step": 1650
+    },
+    {
+      "epoch": 0.209375,
+      "grad_norm": 5.124329090118408,
+      "learning_rate": 1.687466666666667e-05,
+      "loss": 0.2646,
+      "step": 1675
+    },
+    {
+      "epoch": 0.2125,
+      "grad_norm": 3.7056658267974854,
+      "learning_rate": 1.6808000000000002e-05,
+      "loss": 0.251,
+      "step": 1700
+    },
+    {
+      "epoch": 0.215625,
+      "grad_norm": 3.477151870727539,
+      "learning_rate": 1.6741333333333335e-05,
+      "loss": 0.1996,
+      "step": 1725
+    },
+    {
+      "epoch": 0.21875,
+      "grad_norm": 3.3584837913513184,
+      "learning_rate": 1.6674666666666668e-05,
+      "loss": 0.1939,
+      "step": 1750
+    },
+    {
+      "epoch": 0.221875,
+      "grad_norm": 3.136394739151001,
+      "learning_rate": 1.6608e-05,
+      "loss": 0.1664,
+      "step": 1775
+    },
+    {
+      "epoch": 0.225,
+      "grad_norm": 2.997995376586914,
+      "learning_rate": 1.6541333333333334e-05,
+      "loss": 0.1731,
+      "step": 1800
+    },
+    {
+      "epoch": 0.228125,
+      "grad_norm": 3.235027551651001,
+      "learning_rate": 1.6474666666666667e-05,
+      "loss": 0.1609,
+      "step": 1825
+    },
+    {
+      "epoch": 0.23125,
+      "grad_norm": 2.657120704650879,
+      "learning_rate": 1.6408000000000003e-05,
+      "loss": 0.1563,
+      "step": 1850
+    },
+    {
+      "epoch": 0.234375,
+      "grad_norm": 3.5700979232788086,
+      "learning_rate": 1.6341333333333336e-05,
+      "loss": 0.1554,
+      "step": 1875
+    },
+    {
+      "epoch": 0.2375,
+      "grad_norm": 3.4939069747924805,
+      "learning_rate": 1.627466666666667e-05,
+      "loss": 0.1553,
+      "step": 1900
+    },
+    {
+      "epoch": 0.240625,
+      "grad_norm": 2.4021010398864746,
+      "learning_rate": 1.6208e-05,
+      "loss": 0.1494,
+      "step": 1925
+    },
+    {
+      "epoch": 0.24375,
+      "grad_norm": 4.335009574890137,
+      "learning_rate": 1.6141333333333334e-05,
+      "loss": 0.2011,
+      "step": 1950
+    },
+    {
+      "epoch": 0.246875,
+      "grad_norm": 4.650504112243652,
+      "learning_rate": 1.6074666666666667e-05,
+      "loss": 0.2702,
+      "step": 1975
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 4.410916328430176,
+      "learning_rate": 1.6008e-05,
+      "loss": 0.2607,
+      "step": 2000
+    },
+    {
+      "epoch": 0.25,
+      "eval_loss": 0.3708300292491913,
+      "eval_runtime": 74.5053,
+      "eval_samples_per_second": 28.24,
+      "eval_steps_per_second": 1.772,
+      "eval_wer": 19.865382817612414,
+      "step": 2000
+    },
+    {
+      "epoch": 0.253125,
+      "grad_norm": 3.3271749019622803,
+      "learning_rate": 1.5941333333333336e-05,
+      "loss": 0.2175,
+      "step": 2025
+    },
+    {
+      "epoch": 0.25625,
+      "grad_norm": 3.365081310272217,
+      "learning_rate": 1.5874666666666666e-05,
+      "loss": 0.1527,
+      "step": 2050
+    },
+    {
+      "epoch": 0.259375,
+      "grad_norm": 3.701395273208618,
+      "learning_rate": 1.5808000000000002e-05,
+      "loss": 0.1461,
+      "step": 2075
+    },
+    {
+      "epoch": 0.2625,
+      "grad_norm": 2.8837661743164062,
+      "learning_rate": 1.5741333333333335e-05,
+      "loss": 0.1507,
+      "step": 2100
+    },
+    {
+      "epoch": 0.265625,
+      "grad_norm": 3.2435319423675537,
+      "learning_rate": 1.5674666666666667e-05,
+      "loss": 0.1314,
+      "step": 2125
+    },
+    {
+      "epoch": 0.26875,
+      "grad_norm": 2.9637367725372314,
+      "learning_rate": 1.5608e-05,
+      "loss": 0.1292,
+      "step": 2150
+    },
+    {
+      "epoch": 0.271875,
+      "grad_norm": 3.535871982574463,
+      "learning_rate": 1.5541333333333337e-05,
+      "loss": 0.1332,
+      "step": 2175
+    },
+    {
+      "epoch": 0.275,
+      "grad_norm": 3.633970022201538,
+      "learning_rate": 1.547466666666667e-05,
+      "loss": 0.2285,
+      "step": 2200
+    },
+    {
+      "epoch": 0.278125,
+      "grad_norm": 4.483553409576416,
+      "learning_rate": 1.5408000000000002e-05,
+      "loss": 0.2325,
+      "step": 2225
+    },
+    {
+      "epoch": 0.28125,
+      "grad_norm": 4.577600955963135,
+      "learning_rate": 1.5341333333333335e-05,
+      "loss": 0.256,
+      "step": 2250
+    },
+    {
+      "epoch": 0.284375,
+      "grad_norm": 3.32087779045105,
+      "learning_rate": 1.5274666666666668e-05,
+      "loss": 0.1758,
+      "step": 2275
+    },
+    {
+      "epoch": 0.2875,
+      "grad_norm": 3.1856462955474854,
+      "learning_rate": 1.5208e-05,
+      "loss": 0.1311,
+      "step": 2300
+    },
+    {
+      "epoch": 0.290625,
+      "grad_norm": 3.373046636581421,
+      "learning_rate": 1.5141333333333335e-05,
+      "loss": 0.1197,
+      "step": 2325
+    },
+    {
+      "epoch": 0.29375,
+      "grad_norm": 3.019298553466797,
+      "learning_rate": 1.5074666666666668e-05,
+      "loss": 0.1243,
+      "step": 2350
+    },
+    {
+      "epoch": 0.296875,
+      "grad_norm": 2.5749433040618896,
+      "learning_rate": 1.5008000000000001e-05,
+      "loss": 0.1302,
+      "step": 2375
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 4.252561092376709,
+      "learning_rate": 1.4941333333333334e-05,
+      "loss": 0.1277,
+      "step": 2400
+    },
+    {
+      "epoch": 0.303125,
+      "grad_norm": 2.421847343444824,
+      "learning_rate": 1.4874666666666668e-05,
+      "loss": 0.1352,
+      "step": 2425
+    },
+    {
+      "epoch": 0.30625,
+      "grad_norm": 2.247629165649414,
+      "learning_rate": 1.4808e-05,
+      "loss": 0.1286,
+      "step": 2450
+    },
+    {
+      "epoch": 0.309375,
+      "grad_norm": 2.594142436981201,
+      "learning_rate": 1.4741333333333334e-05,
+      "loss": 0.1355,
+      "step": 2475
+    },
+    {
+      "epoch": 0.3125,
+      "grad_norm": 3.3857474327087402,
+      "learning_rate": 1.4674666666666669e-05,
+      "loss": 0.1539,
+      "step": 2500
+    },
+    {
+      "epoch": 0.315625,
+      "grad_norm": 3.906268835067749,
+      "learning_rate": 1.4608000000000001e-05,
+      "loss": 0.2039,
+      "step": 2525
+    },
+    {
+      "epoch": 0.31875,
+      "grad_norm": 4.0277204513549805,
+      "learning_rate": 1.4541333333333334e-05,
+      "loss": 0.1978,
+      "step": 2550
+    },
+    {
+      "epoch": 0.321875,
+      "grad_norm": 3.9858391284942627,
+      "learning_rate": 1.4474666666666669e-05,
+      "loss": 0.2316,
+      "step": 2575
+    },
+    {
+      "epoch": 0.325,
+      "grad_norm": 2.84332537651062,
+      "learning_rate": 1.4408000000000002e-05,
+      "loss": 0.1719,
+      "step": 2600
+    },
+    {
+      "epoch": 0.328125,
+      "grad_norm": 3.294312000274658,
+      "learning_rate": 1.4341333333333334e-05,
+      "loss": 0.1421,
+      "step": 2625
+    },
+    {
+      "epoch": 0.33125,
+      "grad_norm": 2.738583564758301,
+      "learning_rate": 1.4274666666666667e-05,
+      "loss": 0.1297,
+      "step": 2650
+    },
+    {
+      "epoch": 0.334375,
+      "grad_norm": 2.4573464393615723,
+      "learning_rate": 1.4208000000000002e-05,
+      "loss": 0.1223,
+      "step": 2675
+    },
+    {
+      "epoch": 0.3375,
+      "grad_norm": 2.678255319595337,
+      "learning_rate": 1.4141333333333333e-05,
+      "loss": 0.1249,
+      "step": 2700
+    },
+    {
+      "epoch": 0.340625,
+      "grad_norm": 3.8852200508117676,
+      "learning_rate": 1.4074666666666668e-05,
+      "loss": 0.1456,
+      "step": 2725
+    },
+    {
+      "epoch": 0.34375,
+      "grad_norm": 3.630040407180786,
+      "learning_rate": 1.4008000000000002e-05,
+      "loss": 0.1466,
+      "step": 2750
+    },
+    {
+      "epoch": 0.346875,
+      "grad_norm": 4.463013648986816,
+      "learning_rate": 1.3941333333333333e-05,
+      "loss": 0.2719,
+      "step": 2775
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 4.058387279510498,
+      "learning_rate": 1.3874666666666668e-05,
+      "loss": 0.244,
+      "step": 2800
+    },
+    {
+      "epoch": 0.353125,
+      "grad_norm": 4.477724552154541,
+      "learning_rate": 1.3808e-05,
+      "loss": 0.2144,
+      "step": 2825
+    },
+    {
+      "epoch": 0.35625,
+      "grad_norm": 3.583326578140259,
+      "learning_rate": 1.3741333333333335e-05,
+      "loss": 0.2157,
+      "step": 2850
+    },
+    {
+      "epoch": 0.359375,
+      "grad_norm": 3.8221640586853027,
+      "learning_rate": 1.3674666666666668e-05,
+      "loss": 0.2151,
+      "step": 2875
+    },
+    {
+      "epoch": 0.3625,
+      "grad_norm": 3.6878933906555176,
+      "learning_rate": 1.3608e-05,
+      "loss": 0.2063,
+      "step": 2900
+    },
+    {
+      "epoch": 0.365625,
+      "grad_norm": 3.0994338989257812,
+      "learning_rate": 1.3541333333333335e-05,
+      "loss": 0.1613,
+      "step": 2925
+    },
+    {
+      "epoch": 0.36875,
+      "grad_norm": 2.727104902267456,
+      "learning_rate": 1.3474666666666667e-05,
+      "loss": 0.1278,
+      "step": 2950
+    },
+    {
+      "epoch": 0.371875,
+      "grad_norm": 3.0303940773010254,
+      "learning_rate": 1.3408000000000001e-05,
+      "loss": 0.1347,
+      "step": 2975
+    },
+    {
+      "epoch": 0.375,
+      "grad_norm": 3.6327948570251465,
+      "learning_rate": 1.3341333333333336e-05,
+      "loss": 0.1887,
+      "step": 3000
+    },
+    {
+      "epoch": 0.375,
+      "eval_loss": 0.34539544582366943,
+      "eval_runtime": 75.5211,
+      "eval_samples_per_second": 27.86,
+      "eval_steps_per_second": 1.748,
+      "eval_wer": 18.39768159296999,
+      "step": 3000
+    },
+    {
+      "epoch": 0.378125,
+      "grad_norm": 4.803223133087158,
+      "learning_rate": 1.3274666666666667e-05,
+      "loss": 0.2279,
+      "step": 3025
+    },
+    {
+      "epoch": 0.38125,
+      "grad_norm": 4.9647536277771,
+      "learning_rate": 1.3208000000000001e-05,
+      "loss": 0.2556,
+      "step": 3050
+    },
+    {
+      "epoch": 0.384375,
+      "grad_norm": 3.8229358196258545,
+      "learning_rate": 1.3141333333333334e-05,
+      "loss": 0.1795,
+      "step": 3075
+    },
+    {
+      "epoch": 0.3875,
+      "grad_norm": 3.2917191982269287,
+      "learning_rate": 1.3074666666666669e-05,
+      "loss": 0.1355,
+      "step": 3100
+    },
+    {
+      "epoch": 0.390625,
+      "grad_norm": 2.797985076904297,
+      "learning_rate": 1.3008e-05,
+      "loss": 0.1243,
+      "step": 3125
+    },
+    {
+      "epoch": 0.39375,
+      "grad_norm": 3.5542221069335938,
+      "learning_rate": 1.2941333333333334e-05,
+      "loss": 0.1494,
+      "step": 3150
+    },
+    {
+      "epoch": 0.396875,
+      "grad_norm": 3.7407355308532715,
+      "learning_rate": 1.2874666666666669e-05,
+      "loss": 0.2271,
+      "step": 3175
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 4.250736713409424,
+      "learning_rate": 1.2808e-05,
+      "loss": 0.2051,
+      "step": 3200
+    },
+    {
+      "epoch": 0.403125,
+      "grad_norm": 4.248854160308838,
+      "learning_rate": 1.2741333333333335e-05,
+      "loss": 0.2251,
+      "step": 3225
+    },
+    {
+      "epoch": 0.40625,
+      "grad_norm": 3.6842331886291504,
+      "learning_rate": 1.2674666666666669e-05,
+      "loss": 0.1806,
+      "step": 3250
+    },
+    {
+      "epoch": 0.409375,
+      "grad_norm": 4.215803623199463,
+      "learning_rate": 1.2608e-05,
+      "loss": 0.1938,
+      "step": 3275
+    },
+    {
+      "epoch": 0.4125,
+      "grad_norm": 2.878873348236084,
+      "learning_rate": 1.2541333333333335e-05,
+      "loss": 0.211,
+      "step": 3300
+    },
+    {
+      "epoch": 0.415625,
+      "grad_norm": 3.9587783813476562,
+      "learning_rate": 1.2474666666666668e-05,
+      "loss": 0.1601,
+      "step": 3325
+    },
+    {
+      "epoch": 0.41875,
+      "grad_norm": 3.0218279361724854,
+      "learning_rate": 1.2408e-05,
+      "loss": 0.1236,
+      "step": 3350
+    },
+    {
+      "epoch": 0.421875,
+      "grad_norm": 2.6972806453704834,
+      "learning_rate": 1.2341333333333333e-05,
+      "loss": 0.1112,
+      "step": 3375
+    },
+    {
+      "epoch": 0.425,
+      "grad_norm": 2.4185972213745117,
+      "learning_rate": 1.2274666666666668e-05,
+      "loss": 0.1212,
+      "step": 3400
+    },
+    {
+      "epoch": 0.428125,
+      "grad_norm": 3.9205973148345947,
+      "learning_rate": 1.2208000000000002e-05,
+      "loss": 0.1225,
+      "step": 3425
+    },
+    {
+      "epoch": 0.43125,
+      "grad_norm": 2.392932176589966,
+      "learning_rate": 1.2141333333333334e-05,
+      "loss": 0.1167,
+      "step": 3450
+    },
+    {
+      "epoch": 0.434375,
+      "grad_norm": 4.024794101715088,
+      "learning_rate": 1.2074666666666668e-05,
+      "loss": 0.1057,
+      "step": 3475
+    },
+    {
+      "epoch": 0.4375,
+      "grad_norm": 3.367401599884033,
+      "learning_rate": 1.2008000000000003e-05,
+      "loss": 0.1309,
+      "step": 3500
+    },
+    {
+      "epoch": 0.440625,
+      "grad_norm": 2.7556755542755127,
+      "learning_rate": 1.1941333333333334e-05,
+      "loss": 0.1235,
+      "step": 3525
+    },
+    {
+      "epoch": 0.44375,
+      "grad_norm": 3.158759117126465,
+      "learning_rate": 1.1874666666666668e-05,
+      "loss": 0.1233,
+      "step": 3550
+    },
+    {
+      "epoch": 0.446875,
+      "grad_norm": 4.0478010177612305,
+      "learning_rate": 1.1808000000000001e-05,
+      "loss": 0.1892,
+      "step": 3575
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 3.5508739948272705,
+      "learning_rate": 1.1741333333333334e-05,
+      "loss": 0.2186,
+      "step": 3600
+    },
+    {
+      "epoch": 0.453125,
+      "grad_norm": 3.6009671688079834,
+      "learning_rate": 1.1674666666666667e-05,
+      "loss": 0.2093,
+      "step": 3625
+    },
+    {
+      "epoch": 0.45625,
+      "grad_norm": 2.172722578048706,
+      "learning_rate": 1.1608000000000001e-05,
+      "loss": 0.1282,
+      "step": 3650
+    },
+    {
+      "epoch": 0.459375,
+      "grad_norm": 2.729567050933838,
+      "learning_rate": 1.1541333333333332e-05,
+      "loss": 0.1096,
+      "step": 3675
+    },
+    {
+      "epoch": 0.4625,
+      "grad_norm": 2.7863428592681885,
+      "learning_rate": 1.1474666666666667e-05,
+      "loss": 0.1115,
+      "step": 3700
+    },
+    {
+      "epoch": 0.465625,
+      "grad_norm": 2.7164411544799805,
+      "learning_rate": 1.1408000000000002e-05,
+      "loss": 0.1365,
+      "step": 3725
+    },
+    {
+      "epoch": 0.46875,
+      "grad_norm": 3.918790340423584,
+      "learning_rate": 1.1341333333333336e-05,
+      "loss": 0.1759,
+      "step": 3750
+    },
+    {
+      "epoch": 0.471875,
+      "grad_norm": 3.520095109939575,
+      "learning_rate": 1.1274666666666667e-05,
+      "loss": 0.2138,
+      "step": 3775
+    },
+    {
+      "epoch": 0.475,
+      "grad_norm": 4.172083854675293,
+      "learning_rate": 1.1208000000000002e-05,
+      "loss": 0.2189,
+      "step": 3800
+    },
+    {
+      "epoch": 0.478125,
+      "grad_norm": 4.076236724853516,
+      "learning_rate": 1.1141333333333335e-05,
+      "loss": 0.2079,
+      "step": 3825
+    },
+    {
+      "epoch": 0.48125,
+      "grad_norm": 4.950024604797363,
+      "learning_rate": 1.1074666666666667e-05,
+      "loss": 0.2233,
+      "step": 3850
+    },
+    {
+      "epoch": 0.484375,
+      "grad_norm": 3.6588923931121826,
+      "learning_rate": 1.1008e-05,
+      "loss": 0.2011,
+      "step": 3875
+    },
+    {
+      "epoch": 0.4875,
+      "grad_norm": 2.7259421348571777,
+      "learning_rate": 1.0941333333333335e-05,
+      "loss": 0.1419,
+      "step": 3900
+    },
+    {
+      "epoch": 0.490625,
+      "grad_norm": 4.260537147521973,
+      "learning_rate": 1.0874666666666666e-05,
+      "loss": 0.1221,
+      "step": 3925
+    },
+    {
+      "epoch": 0.49375,
+      "grad_norm": 2.9953160285949707,
+      "learning_rate": 1.0808e-05,
+      "loss": 0.1192,
+      "step": 3950
+    },
+    {
+      "epoch": 0.496875,
+      "grad_norm": 5.537333011627197,
+      "learning_rate": 1.0741333333333335e-05,
+      "loss": 0.2057,
+      "step": 3975
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 4.265567302703857,
+      "learning_rate": 1.0674666666666666e-05,
+      "loss": 0.2607,
+      "step": 4000
+    },
+    {
+      "epoch": 0.5,
+      "eval_loss": 0.3218025863170624,
+      "eval_runtime": 75.7693,
+      "eval_samples_per_second": 27.768,
+      "eval_steps_per_second": 1.742,
+      "eval_wer": 16.808450967560997,
+      "step": 4000
+    },
+    {
+      "epoch": 0.503125,
+      "grad_norm": 2.978968620300293,
+      "learning_rate": 1.0608e-05,
+      "loss": 0.1951,
+      "step": 4025
+    },
+    {
+      "epoch": 0.50625,
+      "grad_norm": 5.042616367340088,
+      "learning_rate": 1.0541333333333335e-05,
+      "loss": 0.2567,
+      "step": 4050
+    },
+    {
+      "epoch": 0.509375,
+      "grad_norm": 4.18173885345459,
+      "learning_rate": 1.0474666666666668e-05,
+      "loss": 0.1932,
+      "step": 4075
+    },
+    {
+      "epoch": 0.5125,
+      "grad_norm": 3.0428967475891113,
+      "learning_rate": 1.0408000000000001e-05,
+      "loss": 0.1743,
+      "step": 4100
+    },
+    {
+      "epoch": 0.515625,
+      "grad_norm": 2.8713204860687256,
+      "learning_rate": 1.0341333333333334e-05,
+      "loss": 0.1261,
+      "step": 4125
+    },
+    {
+      "epoch": 0.51875,
+      "grad_norm": 2.912363052368164,
+      "learning_rate": 1.0274666666666668e-05,
+      "loss": 0.1112,
+      "step": 4150
+    },
+    {
+      "epoch": 0.521875,
+      "grad_norm": 2.445664167404175,
+      "learning_rate": 1.0208e-05,
+      "loss": 0.1133,
+      "step": 4175
+    },
+    {
+      "epoch": 0.525,
+      "grad_norm": 2.2317187786102295,
+      "learning_rate": 1.0141333333333334e-05,
+      "loss": 0.106,
+      "step": 4200
+    },
+    {
+      "epoch": 0.528125,
+      "grad_norm": 2.4223077297210693,
+      "learning_rate": 1.0074666666666669e-05,
+      "loss": 0.1142,
+      "step": 4225
+    },
+    {
+      "epoch": 0.53125,
+      "grad_norm": 2.8847713470458984,
+      "learning_rate": 1.0008e-05,
+      "loss": 0.1158,
+      "step": 4250
+    },
+    {
+      "epoch": 0.534375,
+      "grad_norm": 3.4072630405426025,
+      "learning_rate": 9.941333333333334e-06,
+      "loss": 0.1283,
+      "step": 4275
+    },
+    {
+      "epoch": 0.5375,
+      "grad_norm": 4.455233573913574,
+      "learning_rate": 9.874666666666669e-06,
+      "loss": 0.1867,
+      "step": 4300
+    },
+    {
+      "epoch": 0.540625,
+      "grad_norm": 3.465684175491333,
+      "learning_rate": 9.808000000000002e-06,
+      "loss": 0.1681,
+      "step": 4325
+    },
+    {
+      "epoch": 0.54375,
+      "grad_norm": 3.8950164318084717,
+      "learning_rate": 9.741333333333334e-06,
+      "loss": 0.1812,
+      "step": 4350
+    },
+    {
+      "epoch": 0.546875,
+      "grad_norm": 2.216827630996704,
+      "learning_rate": 9.674666666666667e-06,
+      "loss": 0.1069,
+      "step": 4375
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 2.3940842151641846,
+      "learning_rate": 9.608e-06,
+      "loss": 0.095,
+      "step": 4400
+    },
+    {
+      "epoch": 0.553125,
+      "grad_norm": 2.4614291191101074,
+      "learning_rate": 9.541333333333335e-06,
+      "loss": 0.1019,
+      "step": 4425
+    },
+    {
+      "epoch": 0.55625,
+      "grad_norm": 2.891763925552368,
+      "learning_rate": 9.474666666666668e-06,
+      "loss": 0.12,
+      "step": 4450
+    },
+    {
+      "epoch": 0.559375,
+      "grad_norm": 2.791774272918701,
+      "learning_rate": 9.408e-06,
+      "loss": 0.1268,
+      "step": 4475
+    },
+    {
+      "epoch": 0.5625,
+      "grad_norm": 2.9557909965515137,
+      "learning_rate": 9.341333333333335e-06,
+      "loss": 0.1164,
+      "step": 4500
+    },
+    {
+      "epoch": 0.565625,
+      "grad_norm": 3.381051540374756,
+      "learning_rate": 9.274666666666668e-06,
+      "loss": 0.1464,
+      "step": 4525
+    },
+    {
+      "epoch": 0.56875,
+      "grad_norm": 3.789724588394165,
+      "learning_rate": 9.208e-06,
+      "loss": 0.1947,
+      "step": 4550
+    },
+    {
+      "epoch": 0.571875,
+      "grad_norm": 3.7860305309295654,
+      "learning_rate": 9.141333333333333e-06,
+      "loss": 0.1897,
+      "step": 4575
+    },
+    {
+      "epoch": 0.575,
+      "grad_norm": 4.125986576080322,
+      "learning_rate": 9.074666666666668e-06,
+      "loss": 0.2003,
+      "step": 4600
+    },
+    {
+      "epoch": 0.578125,
+      "grad_norm": 4.029356002807617,
+      "learning_rate": 9.008e-06,
+      "loss": 0.2014,
+      "step": 4625
+    },
+    {
+      "epoch": 0.58125,
+      "grad_norm": 4.662783622741699,
+      "learning_rate": 8.941333333333334e-06,
+      "loss": 0.1962,
+      "step": 4650
+    },
+    {
+      "epoch": 0.584375,
+      "grad_norm": 3.978227138519287,
+      "learning_rate": 8.874666666666667e-06,
+      "loss": 0.1693,
+      "step": 4675
+    },
+    {
+      "epoch": 0.5875,
+      "grad_norm": 2.98833966255188,
+      "learning_rate": 8.808000000000001e-06,
+      "loss": 0.1565,
+      "step": 4700
+    },
+    {
+      "epoch": 0.590625,
+      "grad_norm": 4.219015121459961,
+      "learning_rate": 8.741333333333334e-06,
+      "loss": 0.1687,
+      "step": 4725
+    },
+    {
+      "epoch": 0.59375,
+      "grad_norm": 2.8378167152404785,
+      "learning_rate": 8.674666666666668e-06,
+      "loss": 0.1611,
+      "step": 4750
+    },
+    {
+      "epoch": 0.596875,
+      "grad_norm": 2.5076210498809814,
+      "learning_rate": 8.608000000000001e-06,
+      "loss": 0.1186,
+      "step": 4775
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 2.2282755374908447,
+      "learning_rate": 8.541333333333334e-06,
+      "loss": 0.1267,
+      "step": 4800
+    },
+    {
+      "epoch": 0.603125,
+      "grad_norm": 3.080812692642212,
+      "learning_rate": 8.474666666666667e-06,
+      "loss": 0.1112,
+      "step": 4825
+    },
+    {
+      "epoch": 0.60625,
+      "grad_norm": 3.513218641281128,
+      "learning_rate": 8.408e-06,
+      "loss": 0.1326,
+      "step": 4850
+    },
+    {
+      "epoch": 0.609375,
+      "grad_norm": 3.9359219074249268,
+      "learning_rate": 8.341333333333334e-06,
+      "loss": 0.1454,
+      "step": 4875
+    },
+    {
+      "epoch": 0.6125,
+      "grad_norm": 3.585268259048462,
+      "learning_rate": 8.274666666666667e-06,
+      "loss": 0.1583,
+      "step": 4900
+    },
+    {
+      "epoch": 0.615625,
+      "grad_norm": 3.322193145751953,
+      "learning_rate": 8.208e-06,
+      "loss": 0.1417,
+      "step": 4925
+    },
+    {
+      "epoch": 0.61875,
+      "grad_norm": 2.7378623485565186,
+      "learning_rate": 8.141333333333335e-06,
+      "loss": 0.1164,
+      "step": 4950
+    },
+    {
+      "epoch": 0.621875,
+      "grad_norm": 5.096762657165527,
+      "learning_rate": 8.074666666666667e-06,
+      "loss": 0.1077,
+      "step": 4975
+    },
+    {
+      "epoch": 0.625,
+      "grad_norm": 3.030876636505127,
+      "learning_rate": 8.008e-06,
+      "loss": 0.106,
+      "step": 5000
+    },
+    {
+      "epoch": 0.625,
+      "eval_loss": 0.32894453406333923,
+      "eval_runtime": 75.4732,
+      "eval_samples_per_second": 27.877,
+      "eval_steps_per_second": 1.749,
+      "eval_wer": 15.780125268766945,
+      "step": 5000
+    },
+    {
+      "epoch": 0.628125,
+      "grad_norm": 3.1544792652130127,
+      "learning_rate": 7.941333333333335e-06,
+      "loss": 0.1599,
+      "step": 5025
+    },
+    {
+      "epoch": 0.63125,
+      "grad_norm": 3.2215702533721924,
+      "learning_rate": 7.874666666666668e-06,
+      "loss": 0.1461,
+      "step": 5050
+    },
+    {
+      "epoch": 0.634375,
+      "grad_norm": 3.7183897495269775,
+      "learning_rate": 7.808e-06,
+      "loss": 0.2221,
+      "step": 5075
+    },
+    {
+      "epoch": 0.6375,
+      "grad_norm": 3.5974223613739014,
+      "learning_rate": 7.741333333333333e-06,
+      "loss": 0.2,
+      "step": 5100
+    },
+    {
+      "epoch": 0.640625,
+      "grad_norm": 3.8318378925323486,
+      "learning_rate": 7.674666666666666e-06,
+      "loss": 0.1743,
+      "step": 5125
+    },
+    {
+      "epoch": 0.64375,
+      "grad_norm": 4.3595290184021,
+      "learning_rate": 7.608000000000001e-06,
+      "loss": 0.174,
+      "step": 5150
+    },
+    {
+      "epoch": 0.646875,
+      "grad_norm": 2.820388078689575,
+      "learning_rate": 7.5413333333333335e-06,
+      "loss": 0.1415,
+      "step": 5175
+    },
+    {
+      "epoch": 0.65,
+      "grad_norm": 2.1610324382781982,
+      "learning_rate": 7.474666666666666e-06,
+      "loss": 0.0955,
+      "step": 5200
+    },
+    {
+      "epoch": 0.653125,
+      "grad_norm": 2.079317092895508,
+      "learning_rate": 7.408000000000001e-06,
+      "loss": 0.1016,
+      "step": 5225
+    },
+    {
+      "epoch": 0.65625,
+      "grad_norm": 2.3071603775024414,
+      "learning_rate": 7.341333333333334e-06,
+      "loss": 0.1042,
+      "step": 5250
+    },
+    {
+      "epoch": 0.659375,
+      "grad_norm": 2.3762526512145996,
+      "learning_rate": 7.2746666666666674e-06,
+      "loss": 0.1058,
+      "step": 5275
+    },
+    {
+      "epoch": 0.6625,
+      "grad_norm": 3.6836395263671875,
+      "learning_rate": 7.208e-06,
+      "loss": 0.1087,
+      "step": 5300
+    },
+    {
+      "epoch": 0.665625,
+      "grad_norm": 3.0931732654571533,
+      "learning_rate": 7.141333333333333e-06,
+      "loss": 0.1062,
+      "step": 5325
+    },
+    {
+      "epoch": 0.66875,
+      "grad_norm": 4.019095420837402,
+      "learning_rate": 7.074666666666668e-06,
+      "loss": 0.1453,
+      "step": 5350
+    },
+    {
+      "epoch": 0.671875,
+      "grad_norm": 3.419175386428833,
+      "learning_rate": 7.0080000000000005e-06,
+      "loss": 0.1721,
+      "step": 5375
+    },
+    {
+      "epoch": 0.675,
+      "grad_norm": 3.387830972671509,
+      "learning_rate": 6.941333333333334e-06,
+      "loss": 0.1652,
+      "step": 5400
+    },
+    {
+      "epoch": 0.678125,
+      "grad_norm": 3.58986234664917,
+      "learning_rate": 6.874666666666667e-06,
+      "loss": 0.1639,
+      "step": 5425
+    },
+    {
+      "epoch": 0.68125,
+      "grad_norm": 4.178884506225586,
+      "learning_rate": 6.808e-06,
+      "loss": 0.1533,
+      "step": 5450
+    },
+    {
+      "epoch": 0.684375,
+      "grad_norm": 3.2337114810943604,
+      "learning_rate": 6.741333333333334e-06,
+      "loss": 0.1237,
+      "step": 5475
+    },
+    {
+      "epoch": 0.6875,
+      "grad_norm": 2.892301321029663,
+      "learning_rate": 6.674666666666667e-06,
+      "loss": 0.1215,
+      "step": 5500
+    },
+    {
+      "epoch": 0.690625,
+      "grad_norm": 4.553407669067383,
+      "learning_rate": 6.608000000000001e-06,
+      "loss": 0.1537,
+      "step": 5525
+    },
+    {
+      "epoch": 0.69375,
+      "grad_norm": 3.8401100635528564,
+      "learning_rate": 6.541333333333334e-06,
+      "loss": 0.1816,
+      "step": 5550
+    },
+    {
+      "epoch": 0.696875,
+      "grad_norm": 2.8084216117858887,
+      "learning_rate": 6.474666666666667e-06,
+      "loss": 0.1381,
+      "step": 5575
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 2.182170867919922,
+      "learning_rate": 6.408000000000001e-06,
+      "loss": 0.0979,
+      "step": 5600
+    },
+    {
+      "epoch": 0.703125,
+      "grad_norm": 3.2050559520721436,
+      "learning_rate": 6.341333333333334e-06,
+      "loss": 0.0822,
+      "step": 5625
+    },
+    {
+      "epoch": 0.70625,
+      "grad_norm": 2.4150376319885254,
+      "learning_rate": 6.274666666666667e-06,
+      "loss": 0.0815,
+      "step": 5650
+    },
+    {
+      "epoch": 0.709375,
+      "grad_norm": 2.0708541870117188,
+      "learning_rate": 6.2080000000000005e-06,
+      "loss": 0.0944,
+      "step": 5675
+    },
+    {
+      "epoch": 0.7125,
+      "grad_norm": 2.932088851928711,
+      "learning_rate": 6.141333333333333e-06,
+      "loss": 0.1024,
+      "step": 5700
+    },
+    {
+      "epoch": 0.715625,
+      "grad_norm": 2.245450258255005,
+      "learning_rate": 6.074666666666668e-06,
+      "loss": 0.101,
+      "step": 5725
+    },
+    {
+      "epoch": 0.71875,
+      "grad_norm": 2.2716262340545654,
+      "learning_rate": 6.008000000000001e-06,
+      "loss": 0.1,
+      "step": 5750
+    },
+    {
+      "epoch": 0.721875,
+      "grad_norm": 2.496361494064331,
+      "learning_rate": 5.941333333333334e-06,
+      "loss": 0.0966,
+      "step": 5775
+    },
+    {
+      "epoch": 0.725,
+      "grad_norm": 3.3539814949035645,
+      "learning_rate": 5.874666666666667e-06,
+      "loss": 0.0911,
+      "step": 5800
+    },
+    {
+      "epoch": 0.728125,
+      "grad_norm": 2.1496963500976562,
+      "learning_rate": 5.808e-06,
+      "loss": 0.0799,
+      "step": 5825
+    },
+    {
+      "epoch": 0.73125,
+      "grad_norm": 2.5061728954315186,
+      "learning_rate": 5.741333333333335e-06,
+      "loss": 0.0877,
+      "step": 5850
+    },
+    {
+      "epoch": 0.734375,
+      "grad_norm": 2.4256293773651123,
+      "learning_rate": 5.6746666666666675e-06,
+      "loss": 0.0945,
+      "step": 5875
+    },
+    {
+      "epoch": 0.7375,
+      "grad_norm": 3.3995237350463867,
+      "learning_rate": 5.608e-06,
+      "loss": 0.1245,
+      "step": 5900
+    },
+    {
+      "epoch": 0.740625,
+      "grad_norm": 4.556021213531494,
+      "learning_rate": 5.541333333333334e-06,
+      "loss": 0.181,
+      "step": 5925
+    },
+    {
+      "epoch": 0.74375,
+      "grad_norm": 3.8894693851470947,
+      "learning_rate": 5.474666666666667e-06,
+      "loss": 0.1692,
+      "step": 5950
+    },
+    {
+      "epoch": 0.746875,
+      "grad_norm": 3.464264392852783,
+      "learning_rate": 5.408e-06,
+      "loss": 0.1457,
+      "step": 5975
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 3.351585865020752,
+      "learning_rate": 5.341333333333334e-06,
+      "loss": 0.1376,
+      "step": 6000
+    },
+    {
+      "epoch": 0.75,
+      "eval_loss": 0.3052073121070862,
+      "eval_runtime": 73.9486,
+      "eval_samples_per_second": 28.452,
+      "eval_steps_per_second": 1.785,
+      "eval_wer": 15.02757782555857,
+      "step": 6000
+    },
+    {
+      "epoch": 0.753125,
+      "grad_norm": 3.405545473098755,
+      "learning_rate": 5.274666666666667e-06,
+      "loss": 0.1327,
+      "step": 6025
+    },
+    {
+      "epoch": 0.75625,
+      "grad_norm": 2.5915920734405518,
+      "learning_rate": 5.208000000000001e-06,
+      "loss": 0.1145,
+      "step": 6050
+    },
+    {
+      "epoch": 0.759375,
+      "grad_norm": 2.2758867740631104,
+      "learning_rate": 5.141333333333334e-06,
+      "loss": 0.1053,
+      "step": 6075
+    },
+    {
+      "epoch": 0.7625,
+      "grad_norm": 2.8261423110961914,
+      "learning_rate": 5.0746666666666665e-06,
+      "loss": 0.0942,
+      "step": 6100
+    },
+    {
+      "epoch": 0.765625,
+      "grad_norm": 3.362257480621338,
+      "learning_rate": 5.008000000000001e-06,
+      "loss": 0.0969,
+      "step": 6125
+    },
+    {
+      "epoch": 0.76875,
+      "grad_norm": 3.890949249267578,
+      "learning_rate": 4.941333333333334e-06,
+      "loss": 0.1068,
+      "step": 6150
+    },
+    {
+      "epoch": 0.771875,
+      "grad_norm": 3.03787899017334,
+      "learning_rate": 4.874666666666667e-06,
+      "loss": 0.1094,
+      "step": 6175
+    },
+    {
+      "epoch": 0.775,
+      "grad_norm": 2.8833038806915283,
+      "learning_rate": 4.808e-06,
+      "loss": 0.1043,
+      "step": 6200
+    },
+    {
+      "epoch": 0.778125,
+      "grad_norm": 3.1083550453186035,
+      "learning_rate": 4.741333333333334e-06,
+      "loss": 0.1061,
+      "step": 6225
+    },
+    {
+      "epoch": 0.78125,
+      "grad_norm": 3.4954771995544434,
+      "learning_rate": 4.674666666666667e-06,
+      "loss": 0.1013,
+      "step": 6250
+    },
+    {
+      "epoch": 0.784375,
+      "grad_norm": 3.035095691680908,
+      "learning_rate": 4.608000000000001e-06,
+      "loss": 0.1119,
+      "step": 6275
+    },
+    {
+      "epoch": 0.7875,
+      "grad_norm": 3.62898850440979,
+      "learning_rate": 4.5413333333333334e-06,
+      "loss": 0.1253,
+      "step": 6300
+    },
+    {
+      "epoch": 0.790625,
+      "grad_norm": 2.595010280609131,
+      "learning_rate": 4.474666666666667e-06,
+      "loss": 0.1681,
+      "step": 6325
+    },
+    {
+      "epoch": 0.79375,
+      "grad_norm": 3.7245900630950928,
+      "learning_rate": 4.408000000000001e-06,
+      "loss": 0.1501,
+      "step": 6350
+    },
+    {
+      "epoch": 0.796875,
+      "grad_norm": 2.7315571308135986,
+      "learning_rate": 4.344e-06,
+      "loss": 0.1395,
+      "step": 6375
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 2.3649332523345947,
+      "learning_rate": 4.277333333333334e-06,
+      "loss": 0.1111,
+      "step": 6400
+    },
+    {
+      "epoch": 0.803125,
+      "grad_norm": 2.491359233856201,
+      "learning_rate": 4.210666666666667e-06,
+      "loss": 0.1024,
+      "step": 6425
+    },
+    {
+      "epoch": 0.80625,
+      "grad_norm": 2.5985302925109863,
+      "learning_rate": 4.1440000000000005e-06,
+      "loss": 0.0925,
+      "step": 6450
+    },
+    {
+      "epoch": 0.809375,
+      "grad_norm": 4.008167266845703,
+      "learning_rate": 4.077333333333333e-06,
+      "loss": 0.1385,
+      "step": 6475
+    },
+    {
+      "epoch": 0.8125,
+      "grad_norm": 2.743041753768921,
+      "learning_rate": 4.010666666666667e-06,
+      "loss": 0.1289,
+      "step": 6500
+    },
+    {
+      "epoch": 0.815625,
+      "grad_norm": 4.4984893798828125,
+      "learning_rate": 3.944e-06,
+      "loss": 0.1709,
+      "step": 6525
+    },
+    {
+      "epoch": 0.81875,
+      "grad_norm": 3.432147741317749,
+      "learning_rate": 3.8773333333333335e-06,
+      "loss": 0.1563,
+      "step": 6550
+    },
+    {
+      "epoch": 0.821875,
+      "grad_norm": 3.6097943782806396,
+      "learning_rate": 3.810666666666667e-06,
+      "loss": 0.159,
+      "step": 6575
+    },
+    {
+      "epoch": 0.825,
+      "grad_norm": 3.096435308456421,
+      "learning_rate": 3.7440000000000005e-06,
+      "loss": 0.1444,
+      "step": 6600
+    },
+    {
+      "epoch": 0.828125,
+      "grad_norm": 3.5198802947998047,
+      "learning_rate": 3.6773333333333338e-06,
+      "loss": 0.1493,
+      "step": 6625
+    },
+    {
+      "epoch": 0.83125,
+      "grad_norm": 3.3834660053253174,
+      "learning_rate": 3.6106666666666666e-06,
+      "loss": 0.1737,
+      "step": 6650
+    },
+    {
+      "epoch": 0.834375,
+      "grad_norm": 3.2613072395324707,
+      "learning_rate": 3.5440000000000003e-06,
+      "loss": 0.1408,
+      "step": 6675
+    },
+    {
+      "epoch": 0.8375,
+      "grad_norm": 2.618708848953247,
+      "learning_rate": 3.4773333333333336e-06,
+      "loss": 0.1427,
+      "step": 6700
+    },
+    {
+      "epoch": 0.840625,
+      "grad_norm": 2.236100196838379,
+      "learning_rate": 3.4106666666666672e-06,
+      "loss": 0.0976,
+      "step": 6725
+    },
+    {
+      "epoch": 0.84375,
+      "grad_norm": 2.4650626182556152,
+      "learning_rate": 3.344e-06,
+      "loss": 0.0899,
+      "step": 6750
+    },
+    {
+      "epoch": 0.846875,
+      "grad_norm": 3.514897346496582,
+      "learning_rate": 3.2773333333333334e-06,
+      "loss": 0.1099,
+      "step": 6775
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 3.703801155090332,
+      "learning_rate": 3.210666666666667e-06,
+      "loss": 0.1979,
+      "step": 6800
+    },
+    {
+      "epoch": 0.853125,
+      "grad_norm": 4.976568698883057,
+      "learning_rate": 3.1440000000000003e-06,
+      "loss": 0.1801,
+      "step": 6825
+    },
+    {
+      "epoch": 0.85625,
+      "grad_norm": 4.201725959777832,
+      "learning_rate": 3.077333333333334e-06,
+      "loss": 0.1839,
+      "step": 6850
+    },
+    {
+      "epoch": 0.859375,
+      "grad_norm": 3.662229061126709,
+      "learning_rate": 3.010666666666667e-06,
+      "loss": 0.1695,
+      "step": 6875
+    },
+    {
+      "epoch": 0.8625,
+      "grad_norm": 3.8069918155670166,
+      "learning_rate": 2.944e-06,
+      "loss": 0.1615,
+      "step": 6900
+    },
+    {
+      "epoch": 0.865625,
+      "grad_norm": 3.208935499191284,
+      "learning_rate": 2.877333333333334e-06,
+      "loss": 0.1496,
+      "step": 6925
+    },
+    {
+      "epoch": 0.86875,
+      "grad_norm": 3.4923043251037598,
+      "learning_rate": 2.810666666666667e-06,
+      "loss": 0.147,
+      "step": 6950
+    },
+    {
+      "epoch": 0.871875,
+      "grad_norm": 4.01771354675293,
+      "learning_rate": 2.744e-06,
+      "loss": 0.1751,
+      "step": 6975
+    },
+    {
+      "epoch": 0.875,
+      "grad_norm": 4.1294355392456055,
+      "learning_rate": 2.6773333333333336e-06,
+      "loss": 0.1733,
+      "step": 7000
+    },
+    {
+      "epoch": 0.875,
+      "eval_loss": 0.3004015386104584,
+      "eval_runtime": 74.1497,
+      "eval_samples_per_second": 28.375,
+      "eval_steps_per_second": 1.78,
+      "eval_wer": 13.933813218659438,
+      "step": 7000
+    },
+    {
+      "epoch": 0.878125,
+      "grad_norm": 12.625326156616211,
+      "learning_rate": 2.616e-06,
+      "loss": 0.3186,
+      "step": 7025
+    },
+    {
+      "epoch": 0.88125,
+      "grad_norm": 7.057121753692627,
+      "learning_rate": 2.5493333333333337e-06,
+      "loss": 0.5086,
+      "step": 7050
+    },
+    {
+      "epoch": 0.884375,
+      "grad_norm": 5.440456390380859,
+      "learning_rate": 2.482666666666667e-06,
+      "loss": 0.481,
+      "step": 7075
+    },
+    {
+      "epoch": 0.8875,
+      "grad_norm": 6.303742408752441,
+      "learning_rate": 2.4160000000000002e-06,
+      "loss": 0.4034,
+      "step": 7100
+    },
+    {
+      "epoch": 0.890625,
+      "grad_norm": 3.7720141410827637,
+      "learning_rate": 2.3493333333333335e-06,
+      "loss": 0.2274,
+      "step": 7125
+    },
+    {
+      "epoch": 0.89375,
+      "grad_norm": 4.611368656158447,
+      "learning_rate": 2.2826666666666668e-06,
+      "loss": 0.1717,
+      "step": 7150
+    },
+    {
+      "epoch": 0.896875,
+      "grad_norm": 3.155137777328491,
+      "learning_rate": 2.216e-06,
+      "loss": 0.1594,
+      "step": 7175
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 3.6036856174468994,
+      "learning_rate": 2.1493333333333337e-06,
+      "loss": 0.1264,
+      "step": 7200
+    },
+    {
+      "epoch": 0.903125,
+      "grad_norm": 3.040969133377075,
+      "learning_rate": 2.0826666666666666e-06,
+      "loss": 0.1198,
+      "step": 7225
+    },
+    {
+      "epoch": 0.90625,
+      "grad_norm": 2.538546562194824,
+      "learning_rate": 2.0160000000000003e-06,
+      "loss": 0.1233,
+      "step": 7250
+    },
+    {
+      "epoch": 0.909375,
+      "grad_norm": 2.2235965728759766,
+      "learning_rate": 1.9493333333333335e-06,
+      "loss": 0.0948,
+      "step": 7275
+    },
+    {
+      "epoch": 0.9125,
+      "grad_norm": 2.112567663192749,
+      "learning_rate": 1.8826666666666668e-06,
+      "loss": 0.0796,
+      "step": 7300
+    },
+    {
+      "epoch": 0.915625,
+      "grad_norm": 2.5596227645874023,
+      "learning_rate": 1.8160000000000003e-06,
+      "loss": 0.0871,
+      "step": 7325
+    },
+    {
+      "epoch": 0.91875,
+      "grad_norm": 3.282794713973999,
+      "learning_rate": 1.7493333333333335e-06,
+      "loss": 0.1154,
+      "step": 7350
+    },
+    {
+      "epoch": 0.921875,
+      "grad_norm": 3.568565607070923,
+      "learning_rate": 1.6826666666666668e-06,
+      "loss": 0.1648,
+      "step": 7375
+    },
+    {
+      "epoch": 0.925,
+      "grad_norm": 3.731203079223633,
+      "learning_rate": 1.616e-06,
+      "loss": 0.132,
+      "step": 7400
+    },
+    {
+      "epoch": 0.928125,
+      "grad_norm": 2.649831771850586,
+      "learning_rate": 1.5493333333333335e-06,
+      "loss": 0.1518,
+      "step": 7425
+    },
+    {
+      "epoch": 0.93125,
+      "grad_norm": 2.2203938961029053,
+      "learning_rate": 1.4826666666666666e-06,
+      "loss": 0.1045,
+      "step": 7450
+    },
+    {
+      "epoch": 0.934375,
+      "grad_norm": 3.9395177364349365,
+      "learning_rate": 1.416e-06,
+      "loss": 0.0957,
+      "step": 7475
+    },
+    {
+      "epoch": 0.9375,
+      "grad_norm": 3.2605130672454834,
+      "learning_rate": 1.3493333333333333e-06,
+      "loss": 0.0901,
+      "step": 7500
+    },
+    {
+      "epoch": 0.940625,
+      "grad_norm": 4.289961814880371,
+      "learning_rate": 1.2826666666666668e-06,
+      "loss": 0.1846,
+      "step": 7525
+    },
+    {
+      "epoch": 0.94375,
+      "grad_norm": 4.553671836853027,
+      "learning_rate": 1.216e-06,
+      "loss": 0.2492,
+      "step": 7550
+    },
+    {
+      "epoch": 0.946875,
+      "grad_norm": 5.279869079589844,
+      "learning_rate": 1.1493333333333334e-06,
+      "loss": 0.2844,
+      "step": 7575
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 2.1614372730255127,
+      "learning_rate": 1.0826666666666668e-06,
+      "loss": 0.1715,
+      "step": 7600
+    },
+    {
+      "epoch": 0.953125,
+      "grad_norm": 2.8423452377319336,
+      "learning_rate": 1.016e-06,
+      "loss": 0.1162,
+      "step": 7625
+    },
+    {
+      "epoch": 0.95625,
+      "grad_norm": 2.077169895172119,
+      "learning_rate": 9.493333333333334e-07,
+      "loss": 0.094,
+      "step": 7650
+    },
+    {
+      "epoch": 0.959375,
+      "grad_norm": 3.680450201034546,
+      "learning_rate": 8.826666666666666e-07,
+      "loss": 0.1291,
+      "step": 7675
+    },
+    {
+      "epoch": 0.9625,
+      "grad_norm": 3.4633026123046875,
+      "learning_rate": 8.160000000000001e-07,
+      "loss": 0.1549,
+      "step": 7700
+    },
+    {
+      "epoch": 0.965625,
+      "grad_norm": 3.8427698612213135,
+      "learning_rate": 7.493333333333335e-07,
+      "loss": 0.1646,
+      "step": 7725
+    },
+    {
+      "epoch": 0.96875,
+      "grad_norm": 2.8945538997650146,
+      "learning_rate": 6.826666666666667e-07,
+      "loss": 0.1524,
+      "step": 7750
+    },
+    {
+      "epoch": 0.971875,
+      "grad_norm": 3.424391269683838,
+      "learning_rate": 6.160000000000001e-07,
+      "loss": 0.1083,
+      "step": 7775
+    },
+    {
+      "epoch": 0.975,
+      "grad_norm": 5.2906951904296875,
+      "learning_rate": 5.493333333333334e-07,
+      "loss": 0.1327,
+      "step": 7800
+    },
+    {
+      "epoch": 0.978125,
+      "grad_norm": 6.5452046394348145,
+      "learning_rate": 4.853333333333333e-07,
+      "loss": 0.3624,
+      "step": 7825
+    },
+    {
+      "epoch": 0.98125,
+      "grad_norm": 2.549628496170044,
+      "learning_rate": 4.186666666666667e-07,
+      "loss": 0.1518,
+      "step": 7850
+    },
+    {
+      "epoch": 0.984375,
+      "grad_norm": 2.076683759689331,
+      "learning_rate": 3.5200000000000003e-07,
+      "loss": 0.1048,
+      "step": 7875
+    },
+    {
+      "epoch": 0.9875,
+      "grad_norm": 2.8122971057891846,
+      "learning_rate": 2.8533333333333335e-07,
+      "loss": 0.0981,
+      "step": 7900
+    },
+    {
+      "epoch": 0.990625,
+      "grad_norm": 2.3224234580993652,
+      "learning_rate": 2.186666666666667e-07,
+      "loss": 0.1017,
+      "step": 7925
+    },
+    {
+      "epoch": 0.99375,
+      "grad_norm": 2.10176682472229,
+      "learning_rate": 1.52e-07,
+      "loss": 0.0937,
+      "step": 7950
+    },
+    {
+      "epoch": 0.996875,
+      "grad_norm": 3.3718252182006836,
+      "learning_rate": 8.533333333333334e-08,
+      "loss": 0.1,
+      "step": 7975
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 4.459543704986572,
+      "learning_rate": 1.866666666666667e-08,
+      "loss": 0.1228,
+      "step": 8000
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.24521400034427643,
+      "eval_runtime": 75.7252,
+      "eval_samples_per_second": 27.785,
+      "eval_steps_per_second": 1.743,
+      "eval_wer": 13.816958025614658,
+      "step": 8000
+    },
+    {
+      "epoch": 1.0,
+      "step": 8000,
+      "total_flos": 1.660415901696e+19,
+      "train_loss": 0.22206098145246506,
+      "train_runtime": 4270.5513,
+      "train_samples_per_second": 59.945,
+      "train_steps_per_second": 1.873
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 8000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 9223372036854775807,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.660415901696e+19,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

wandb/run-20250214_113805-769lwzm2/files/output.log CHANGED Viewed

@@ -1553,3 +1553,161 @@ Training completed. Do not forget to share your model on huggingface.co/models =
 [INFO|feature_extraction_utils.py:437] 2025-02-14 12:49:32,767 >> Feature extractor saved in ./preprocessor_config.json
 [INFO|modelcard.py:449] 2025-02-14 12:49:32,953 >> Dropping the following result as it does not have all the necessary fields:
 {'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'metrics': [{'name': 'Wer', 'type': 'wer', 'value': 13.816958025614658}]}

 [INFO|feature_extraction_utils.py:437] 2025-02-14 12:49:32,767 >> Feature extractor saved in ./preprocessor_config.json
 [INFO|modelcard.py:449] 2025-02-14 12:49:32,953 >> Dropping the following result as it does not have all the necessary fields:
 {'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'metrics': [{'name': 'Wer', 'type': 'wer', 'value': 13.816958025614658}]}
+***** train metrics *****
+  epoch                    =           1.0
+  total_flos               = 15463828125GF
+  train_loss               =        0.2221
+  train_runtime            =    1:11:10.55
+  train_samples_per_second =        59.945
+  train_steps_per_second   =         1.873
+02/14/2025 12:49:36 - INFO - __main__ - *** Evaluate ***
+[INFO|trainer.py:4176] 2025-02-14 12:49:36,135 >>
+***** Running Evaluation *****
+[INFO|trainer.py:4180] 2025-02-14 12:49:36,135 >>   Num examples: Unknown
+[INFO|trainer.py:4181] 2025-02-14 12:49:36,360 >>   Batch size = 16
+[INFO|trainer_utils.py:837] 2025-02-14 12:49:43,950 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:44,088 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:44,654 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:45,564 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:46,260 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:46,754 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:47,308 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:47,931 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:48,501 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:49,141 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:49,695 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:50,267 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:50,914 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:51,407 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:51,908 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:52,450 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:52,868 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:53,315 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:53,848 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:54,262 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:54,738 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:55,208 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:55,706 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:56,132 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:56,581 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:56,973 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:57,385 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:57,843 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:58,338 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:58,713 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:59,132 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:49:59,574 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:00,003 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:00,467 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:00,862 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:01,326 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:01,773 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:02,167 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:02,612 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:03,040 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:03,649 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:04,062 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:04,494 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:04,870 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:05,277 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:05,674 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:06,139 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:06,557 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:06,963 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:07,474 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:07,892 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:08,329 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:08,757 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:09,132 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:09,554 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:09,970 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:10,439 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:10,807 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:11,197 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:11,646 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:12,028 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:12,475 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:12,906 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:13,352 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:13,772 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:14,141 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:14,494 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:14,909 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:15,326 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:15,743 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:16,168 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:16,565 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:17,038 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:17,466 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:17,944 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:18,374 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:18,821 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:19,197 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:19,609 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:20,038 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:20,447 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:20,846 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:21,237 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:21,657 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:22,106 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:22,560 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:22,975 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:23,406 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:23,899 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:24,343 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:24,850 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:25,273 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:25,693 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:26,113 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:26,516 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:26,998 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:27,442 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:27,869 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:28,297 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:28,718 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:29,158 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:29,597 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:30,052 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:30,469 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:30,922 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:31,405 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:31,834 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:32,271 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:32,744 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:33,154 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:33,548 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:33,986 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:34,429 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:34,820 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:35,204 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:35,605 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:36,003 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:36,456 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:36,919 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:37,326 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:37,739 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:38,165 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:38,544 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:38,977 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:39,392 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:39,796 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:40,194 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:40,597 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:41,016 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:41,415 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:41,838 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:42,209 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-14 12:50:42,555 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+***** eval metrics *****
+  epoch                   =        1.0
+  eval_loss               =     0.2452
+  eval_runtime            = 0:01:14.51
+  eval_samples_per_second =     28.236
+  eval_steps_per_second   =      1.771
+  eval_wer                =     13.817
+[INFO|trainer.py:3860] 2025-02-14 12:50:50,651 >> Saving model checkpoint to ./
+[INFO|configuration_utils.py:423] 2025-02-14 12:50:50,652 >> Configuration saved in ./config.json
+[INFO|configuration_utils.py:906] 2025-02-14 12:50:50,653 >> Configuration saved in ./generation_config.json
+[INFO|modeling_utils.py:3040] 2025-02-14 12:50:51,227 >> Model weights saved in ./model.safetensors
+[INFO|feature_extraction_utils.py:437] 2025-02-14 12:50:51,228 >> Feature extractor saved in ./preprocessor_config.json
+run-769lwzm2.wandb: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.10M/4.10M [00:00<00:00, 5.43MB/s]

wandb/run-20250214_113805-769lwzm2/run-769lwzm2.wandb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5bd2b1631e813875cd713dcf98c810f3f515b442c8e5d11ba595cd916d228576
-size 4063232

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d98c19965714e6634732bf8bd8b654d2f947be412067708c8249f41aaa7c73d
+size 4096000