whisper-small-dialect_all_seed84 / trainer_state.json
otozz's picture
Upload folder using huggingface_hub
5782859 verified
raw
history blame
20.6 kB
{
"best_metric": 81.65732357316676,
"best_model_checkpoint": "/scratch/p310333/whisper-small-dialect_all_seed84/checkpoint-1750",
"epoch": 0.3017137340091721,
"eval_steps": 250,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003017137340091721,
"grad_norm": 65.54208374023438,
"learning_rate": 5.000000000000001e-07,
"loss": 4.9187,
"step": 25
},
{
"epoch": 0.006034274680183442,
"grad_norm": 30.615581512451172,
"learning_rate": 1.0000000000000002e-06,
"loss": 4.0023,
"step": 50
},
{
"epoch": 0.009051412020275163,
"grad_norm": 29.282546997070312,
"learning_rate": 1.5e-06,
"loss": 3.4756,
"step": 75
},
{
"epoch": 0.012068549360366883,
"grad_norm": 28.80223846435547,
"learning_rate": 2.0000000000000003e-06,
"loss": 2.7255,
"step": 100
},
{
"epoch": 0.015085686700458605,
"grad_norm": 32.79741287231445,
"learning_rate": 2.5e-06,
"loss": 2.4196,
"step": 125
},
{
"epoch": 0.018102824040550327,
"grad_norm": 23.08782196044922,
"learning_rate": 3e-06,
"loss": 2.2985,
"step": 150
},
{
"epoch": 0.021119961380642045,
"grad_norm": 26.61124610900879,
"learning_rate": 3.5e-06,
"loss": 2.3417,
"step": 175
},
{
"epoch": 0.024137098720733767,
"grad_norm": 19.468734741210938,
"learning_rate": 4.000000000000001e-06,
"loss": 2.1087,
"step": 200
},
{
"epoch": 0.02715423606082549,
"grad_norm": 33.279293060302734,
"learning_rate": 4.5e-06,
"loss": 2.0878,
"step": 225
},
{
"epoch": 0.03017137340091721,
"grad_norm": 27.024686813354492,
"learning_rate": 5e-06,
"loss": 2.1126,
"step": 250
},
{
"epoch": 0.03017137340091721,
"eval_cer": 75.39939054285433,
"eval_loss": 1.9080588817596436,
"eval_runtime": 4890.5649,
"eval_samples_per_second": 3.389,
"eval_steps_per_second": 0.424,
"eval_wer": 89.64246657668397,
"step": 250
},
{
"epoch": 0.03318851074100893,
"grad_norm": 24.560420989990234,
"learning_rate": 5.500000000000001e-06,
"loss": 1.8494,
"step": 275
},
{
"epoch": 0.036205648081100654,
"grad_norm": 24.324853897094727,
"learning_rate": 6e-06,
"loss": 1.8878,
"step": 300
},
{
"epoch": 0.039222785421192376,
"grad_norm": 28.385276794433594,
"learning_rate": 6.5000000000000004e-06,
"loss": 1.7333,
"step": 325
},
{
"epoch": 0.04223992276128409,
"grad_norm": 27.999126434326172,
"learning_rate": 7e-06,
"loss": 1.6565,
"step": 350
},
{
"epoch": 0.04525706010137581,
"grad_norm": 29.16621208190918,
"learning_rate": 7.500000000000001e-06,
"loss": 1.4705,
"step": 375
},
{
"epoch": 0.048274197441467534,
"grad_norm": 21.424270629882812,
"learning_rate": 8.000000000000001e-06,
"loss": 1.3766,
"step": 400
},
{
"epoch": 0.051291334781559256,
"grad_norm": 25.499229431152344,
"learning_rate": 8.5e-06,
"loss": 1.3916,
"step": 425
},
{
"epoch": 0.05430847212165098,
"grad_norm": 30.007627487182617,
"learning_rate": 9e-06,
"loss": 1.4213,
"step": 450
},
{
"epoch": 0.0573256094617427,
"grad_norm": 30.537519454956055,
"learning_rate": 9.5e-06,
"loss": 1.417,
"step": 475
},
{
"epoch": 0.06034274680183442,
"grad_norm": 26.885221481323242,
"learning_rate": 1e-05,
"loss": 1.4094,
"step": 500
},
{
"epoch": 0.06034274680183442,
"eval_cer": 63.82253332025917,
"eval_loss": 1.4668316841125488,
"eval_runtime": 4839.8999,
"eval_samples_per_second": 3.424,
"eval_steps_per_second": 0.428,
"eval_wer": 91.40993684791283,
"step": 500
},
{
"epoch": 0.06335988414192614,
"grad_norm": 28.894699096679688,
"learning_rate": 9.944444444444445e-06,
"loss": 1.5314,
"step": 525
},
{
"epoch": 0.06637702148201786,
"grad_norm": 24.584243774414062,
"learning_rate": 9.88888888888889e-06,
"loss": 1.3876,
"step": 550
},
{
"epoch": 0.06939415882210959,
"grad_norm": 23.92827033996582,
"learning_rate": 9.833333333333333e-06,
"loss": 1.3787,
"step": 575
},
{
"epoch": 0.07241129616220131,
"grad_norm": 22.209672927856445,
"learning_rate": 9.777777777777779e-06,
"loss": 1.5477,
"step": 600
},
{
"epoch": 0.07542843350229303,
"grad_norm": 24.50571632385254,
"learning_rate": 9.722222222222223e-06,
"loss": 1.3358,
"step": 625
},
{
"epoch": 0.07844557084238475,
"grad_norm": 25.96898078918457,
"learning_rate": 9.666666666666667e-06,
"loss": 1.4117,
"step": 650
},
{
"epoch": 0.08146270818247647,
"grad_norm": 24.520370483398438,
"learning_rate": 9.611111111111112e-06,
"loss": 1.4163,
"step": 675
},
{
"epoch": 0.08447984552256818,
"grad_norm": 28.337772369384766,
"learning_rate": 9.555555555555556e-06,
"loss": 1.4381,
"step": 700
},
{
"epoch": 0.0874969828626599,
"grad_norm": 22.536033630371094,
"learning_rate": 9.5e-06,
"loss": 1.4022,
"step": 725
},
{
"epoch": 0.09051412020275162,
"grad_norm": 23.963077545166016,
"learning_rate": 9.444444444444445e-06,
"loss": 1.4845,
"step": 750
},
{
"epoch": 0.09051412020275162,
"eval_cer": 61.253460253286775,
"eval_loss": 1.4192742109298706,
"eval_runtime": 4048.8139,
"eval_samples_per_second": 4.093,
"eval_steps_per_second": 0.512,
"eval_wer": 89.38631146491167,
"step": 750
},
{
"epoch": 0.09353125754284335,
"grad_norm": 21.442434310913086,
"learning_rate": 9.38888888888889e-06,
"loss": 1.3301,
"step": 775
},
{
"epoch": 0.09654839488293507,
"grad_norm": 18.17827606201172,
"learning_rate": 9.333333333333334e-06,
"loss": 1.3823,
"step": 800
},
{
"epoch": 0.09956553222302679,
"grad_norm": 23.526996612548828,
"learning_rate": 9.277777777777778e-06,
"loss": 1.5849,
"step": 825
},
{
"epoch": 0.10258266956311851,
"grad_norm": 21.815263748168945,
"learning_rate": 9.222222222222224e-06,
"loss": 1.3643,
"step": 850
},
{
"epoch": 0.10559980690321023,
"grad_norm": 21.027591705322266,
"learning_rate": 9.166666666666666e-06,
"loss": 1.3723,
"step": 875
},
{
"epoch": 0.10861694424330195,
"grad_norm": 26.622665405273438,
"learning_rate": 9.111111111111112e-06,
"loss": 1.4118,
"step": 900
},
{
"epoch": 0.11163408158339368,
"grad_norm": 25.46664047241211,
"learning_rate": 9.055555555555556e-06,
"loss": 1.3537,
"step": 925
},
{
"epoch": 0.1146512189234854,
"grad_norm": 21.33067512512207,
"learning_rate": 9e-06,
"loss": 1.3697,
"step": 950
},
{
"epoch": 0.11766835626357712,
"grad_norm": 27.155698776245117,
"learning_rate": 8.944444444444446e-06,
"loss": 1.4537,
"step": 975
},
{
"epoch": 0.12068549360366884,
"grad_norm": 27.02322769165039,
"learning_rate": 8.888888888888888e-06,
"loss": 1.4639,
"step": 1000
},
{
"epoch": 0.12068549360366884,
"eval_cer": 62.40448946649872,
"eval_loss": 1.3861624002456665,
"eval_runtime": 6536.9994,
"eval_samples_per_second": 2.535,
"eval_steps_per_second": 0.317,
"eval_wer": 91.8197850267485,
"step": 1000
},
{
"epoch": 0.12370263094376056,
"grad_norm": 24.13388442993164,
"learning_rate": 8.833333333333334e-06,
"loss": 1.4738,
"step": 1025
},
{
"epoch": 0.12671976828385229,
"grad_norm": 26.072269439697266,
"learning_rate": 8.777777777777778e-06,
"loss": 1.4402,
"step": 1050
},
{
"epoch": 0.129736905623944,
"grad_norm": 31.587852478027344,
"learning_rate": 8.722222222222224e-06,
"loss": 1.3485,
"step": 1075
},
{
"epoch": 0.13275404296403573,
"grad_norm": 23.130081176757812,
"learning_rate": 8.666666666666668e-06,
"loss": 1.3384,
"step": 1100
},
{
"epoch": 0.13577118030412744,
"grad_norm": 27.463407516479492,
"learning_rate": 8.611111111111112e-06,
"loss": 1.4537,
"step": 1125
},
{
"epoch": 0.13878831764421917,
"grad_norm": 20.881338119506836,
"learning_rate": 8.555555555555556e-06,
"loss": 1.3062,
"step": 1150
},
{
"epoch": 0.14180545498431088,
"grad_norm": 26.432994842529297,
"learning_rate": 8.5e-06,
"loss": 1.3982,
"step": 1175
},
{
"epoch": 0.14482259232440262,
"grad_norm": 18.55461311340332,
"learning_rate": 8.444444444444446e-06,
"loss": 1.2873,
"step": 1200
},
{
"epoch": 0.14783972966449432,
"grad_norm": 23.862037658691406,
"learning_rate": 8.38888888888889e-06,
"loss": 1.3043,
"step": 1225
},
{
"epoch": 0.15085686700458606,
"grad_norm": 27.1133975982666,
"learning_rate": 8.333333333333334e-06,
"loss": 1.3855,
"step": 1250
},
{
"epoch": 0.15085686700458606,
"eval_cer": 66.67634657595208,
"eval_loss": 1.367380976676941,
"eval_runtime": 5424.8274,
"eval_samples_per_second": 3.055,
"eval_steps_per_second": 0.382,
"eval_wer": 93.20499305425562,
"step": 1250
},
{
"epoch": 0.15387400434467777,
"grad_norm": 24.38422393798828,
"learning_rate": 8.277777777777778e-06,
"loss": 1.4679,
"step": 1275
},
{
"epoch": 0.1568911416847695,
"grad_norm": 23.12870216369629,
"learning_rate": 8.222222222222222e-06,
"loss": 1.4213,
"step": 1300
},
{
"epoch": 0.1599082790248612,
"grad_norm": 24.16248321533203,
"learning_rate": 8.166666666666668e-06,
"loss": 1.2921,
"step": 1325
},
{
"epoch": 0.16292541636495295,
"grad_norm": 22.89928436279297,
"learning_rate": 8.111111111111112e-06,
"loss": 1.3745,
"step": 1350
},
{
"epoch": 0.16594255370504465,
"grad_norm": 26.103015899658203,
"learning_rate": 8.055555555555557e-06,
"loss": 1.3796,
"step": 1375
},
{
"epoch": 0.16895969104513636,
"grad_norm": 17.778417587280273,
"learning_rate": 8.000000000000001e-06,
"loss": 1.3638,
"step": 1400
},
{
"epoch": 0.1719768283852281,
"grad_norm": 23.057931900024414,
"learning_rate": 7.944444444444445e-06,
"loss": 1.3524,
"step": 1425
},
{
"epoch": 0.1749939657253198,
"grad_norm": 20.255752563476562,
"learning_rate": 7.88888888888889e-06,
"loss": 1.3768,
"step": 1450
},
{
"epoch": 0.17801110306541154,
"grad_norm": 19.22992706298828,
"learning_rate": 7.833333333333333e-06,
"loss": 1.247,
"step": 1475
},
{
"epoch": 0.18102824040550325,
"grad_norm": 23.74711036682129,
"learning_rate": 7.77777777777778e-06,
"loss": 1.3741,
"step": 1500
},
{
"epoch": 0.18102824040550325,
"eval_cer": 58.32365342404792,
"eval_loss": 1.3499553203582764,
"eval_runtime": 4123.7168,
"eval_samples_per_second": 4.019,
"eval_steps_per_second": 0.502,
"eval_wer": 83.93414843203514,
"step": 1500
},
{
"epoch": 0.18404537774559498,
"grad_norm": 22.509279251098633,
"learning_rate": 7.722222222222223e-06,
"loss": 1.2904,
"step": 1525
},
{
"epoch": 0.1870625150856867,
"grad_norm": 22.517881393432617,
"learning_rate": 7.666666666666667e-06,
"loss": 1.3,
"step": 1550
},
{
"epoch": 0.19007965242577843,
"grad_norm": 26.30403709411621,
"learning_rate": 7.611111111111111e-06,
"loss": 1.3627,
"step": 1575
},
{
"epoch": 0.19309678976587014,
"grad_norm": 20.28595542907715,
"learning_rate": 7.555555555555556e-06,
"loss": 1.2716,
"step": 1600
},
{
"epoch": 0.19611392710596187,
"grad_norm": 22.23461151123047,
"learning_rate": 7.500000000000001e-06,
"loss": 1.4848,
"step": 1625
},
{
"epoch": 0.19913106444605358,
"grad_norm": 23.31128692626953,
"learning_rate": 7.444444444444445e-06,
"loss": 1.2453,
"step": 1650
},
{
"epoch": 0.20214820178614532,
"grad_norm": 23.57061004638672,
"learning_rate": 7.38888888888889e-06,
"loss": 1.3575,
"step": 1675
},
{
"epoch": 0.20516533912623702,
"grad_norm": 19.259546279907227,
"learning_rate": 7.333333333333333e-06,
"loss": 1.2894,
"step": 1700
},
{
"epoch": 0.20818247646632876,
"grad_norm": 25.171783447265625,
"learning_rate": 7.277777777777778e-06,
"loss": 1.3894,
"step": 1725
},
{
"epoch": 0.21119961380642047,
"grad_norm": 21.088159561157227,
"learning_rate": 7.222222222222223e-06,
"loss": 1.3073,
"step": 1750
},
{
"epoch": 0.21119961380642047,
"eval_cer": 58.877671089253795,
"eval_loss": 1.326649785041809,
"eval_runtime": 4478.8906,
"eval_samples_per_second": 3.7,
"eval_steps_per_second": 0.463,
"eval_wer": 81.65732357316676,
"step": 1750
},
{
"epoch": 0.2142167511465122,
"grad_norm": 23.551097869873047,
"learning_rate": 7.166666666666667e-06,
"loss": 1.3203,
"step": 1775
},
{
"epoch": 0.2172338884866039,
"grad_norm": 25.782739639282227,
"learning_rate": 7.111111111111112e-06,
"loss": 1.2752,
"step": 1800
},
{
"epoch": 0.22025102582669562,
"grad_norm": 16.879140853881836,
"learning_rate": 7.055555555555557e-06,
"loss": 1.2145,
"step": 1825
},
{
"epoch": 0.22326816316678735,
"grad_norm": 22.139205932617188,
"learning_rate": 7e-06,
"loss": 1.2678,
"step": 1850
},
{
"epoch": 0.22628530050687906,
"grad_norm": 15.391493797302246,
"learning_rate": 6.944444444444445e-06,
"loss": 1.2468,
"step": 1875
},
{
"epoch": 0.2293024378469708,
"grad_norm": 20.986146926879883,
"learning_rate": 6.88888888888889e-06,
"loss": 1.2093,
"step": 1900
},
{
"epoch": 0.2323195751870625,
"grad_norm": 27.949708938598633,
"learning_rate": 6.833333333333334e-06,
"loss": 1.2795,
"step": 1925
},
{
"epoch": 0.23533671252715424,
"grad_norm": 21.94474220275879,
"learning_rate": 6.777777777777779e-06,
"loss": 1.2931,
"step": 1950
},
{
"epoch": 0.23835384986724595,
"grad_norm": 22.343372344970703,
"learning_rate": 6.7222222222222235e-06,
"loss": 1.2731,
"step": 1975
},
{
"epoch": 0.24137098720733768,
"grad_norm": 21.14777374267578,
"learning_rate": 6.666666666666667e-06,
"loss": 1.2993,
"step": 2000
},
{
"epoch": 0.24137098720733768,
"eval_cer": 59.81580515474027,
"eval_loss": 1.3122555017471313,
"eval_runtime": 5034.5037,
"eval_samples_per_second": 3.292,
"eval_steps_per_second": 0.412,
"eval_wer": 87.38337553324598,
"step": 2000
},
{
"epoch": 0.2443881245474294,
"grad_norm": 24.091772079467773,
"learning_rate": 6.6111111111111115e-06,
"loss": 1.3477,
"step": 2025
},
{
"epoch": 0.24740526188752113,
"grad_norm": 22.88396453857422,
"learning_rate": 6.555555555555556e-06,
"loss": 1.3089,
"step": 2050
},
{
"epoch": 0.25042239922761284,
"grad_norm": 17.0950984954834,
"learning_rate": 6.5000000000000004e-06,
"loss": 1.2598,
"step": 2075
},
{
"epoch": 0.25343953656770457,
"grad_norm": 19.088529586791992,
"learning_rate": 6.444444444444445e-06,
"loss": 1.2255,
"step": 2100
},
{
"epoch": 0.2564566739077963,
"grad_norm": 24.771873474121094,
"learning_rate": 6.3888888888888885e-06,
"loss": 1.2642,
"step": 2125
},
{
"epoch": 0.259473811247888,
"grad_norm": 25.191925048828125,
"learning_rate": 6.333333333333333e-06,
"loss": 1.2942,
"step": 2150
},
{
"epoch": 0.2624909485879797,
"grad_norm": 25.375642776489258,
"learning_rate": 6.277777777777778e-06,
"loss": 1.4181,
"step": 2175
},
{
"epoch": 0.26550808592807146,
"grad_norm": 21.1870059967041,
"learning_rate": 6.222222222222223e-06,
"loss": 1.304,
"step": 2200
},
{
"epoch": 0.2685252232681632,
"grad_norm": 25.293983459472656,
"learning_rate": 6.166666666666667e-06,
"loss": 1.2795,
"step": 2225
},
{
"epoch": 0.2715423606082549,
"grad_norm": 18.648513793945312,
"learning_rate": 6.111111111111112e-06,
"loss": 1.3909,
"step": 2250
},
{
"epoch": 0.2715423606082549,
"eval_cer": 69.14660914034863,
"eval_loss": 1.2956976890563965,
"eval_runtime": 4406.2547,
"eval_samples_per_second": 3.761,
"eval_steps_per_second": 0.47,
"eval_wer": 94.55374823893361,
"step": 2250
},
{
"epoch": 0.2745594979483466,
"grad_norm": 19.018657684326172,
"learning_rate": 6.055555555555555e-06,
"loss": 1.3042,
"step": 2275
},
{
"epoch": 0.27757663528843834,
"grad_norm": 21.156967163085938,
"learning_rate": 6e-06,
"loss": 1.2785,
"step": 2300
},
{
"epoch": 0.28059377262853,
"grad_norm": 18.3033390045166,
"learning_rate": 5.944444444444445e-06,
"loss": 1.2613,
"step": 2325
},
{
"epoch": 0.28361090996862176,
"grad_norm": 16.72675323486328,
"learning_rate": 5.88888888888889e-06,
"loss": 1.2575,
"step": 2350
},
{
"epoch": 0.2866280473087135,
"grad_norm": 28.087432861328125,
"learning_rate": 5.833333333333334e-06,
"loss": 1.3014,
"step": 2375
},
{
"epoch": 0.28964518464880523,
"grad_norm": 26.902196884155273,
"learning_rate": 5.777777777777778e-06,
"loss": 1.2205,
"step": 2400
},
{
"epoch": 0.2926623219888969,
"grad_norm": 23.583770751953125,
"learning_rate": 5.722222222222222e-06,
"loss": 1.256,
"step": 2425
},
{
"epoch": 0.29567945932898865,
"grad_norm": 24.659427642822266,
"learning_rate": 5.666666666666667e-06,
"loss": 1.3358,
"step": 2450
},
{
"epoch": 0.2986965966690804,
"grad_norm": 24.409543991088867,
"learning_rate": 5.611111111111112e-06,
"loss": 1.2786,
"step": 2475
},
{
"epoch": 0.3017137340091721,
"grad_norm": 23.712934494018555,
"learning_rate": 5.555555555555557e-06,
"loss": 1.3662,
"step": 2500
},
{
"epoch": 0.3017137340091721,
"eval_cer": 59.539550600782945,
"eval_loss": 1.282615065574646,
"eval_runtime": 5034.6376,
"eval_samples_per_second": 3.292,
"eval_steps_per_second": 0.412,
"eval_wer": 84.68093910404824,
"step": 2500
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 250,
"total_flos": 5.7717080064e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}