|
{ |
|
"best_metric": 81.65732357316676, |
|
"best_model_checkpoint": "/scratch/p310333/whisper-small-dialect_all_seed84/checkpoint-1750", |
|
"epoch": 0.3017137340091721, |
|
"eval_steps": 250, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003017137340091721, |
|
"grad_norm": 65.54208374023438, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 4.9187, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.006034274680183442, |
|
"grad_norm": 30.615581512451172, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 4.0023, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.009051412020275163, |
|
"grad_norm": 29.282546997070312, |
|
"learning_rate": 1.5e-06, |
|
"loss": 3.4756, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.012068549360366883, |
|
"grad_norm": 28.80223846435547, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.7255, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.015085686700458605, |
|
"grad_norm": 32.79741287231445, |
|
"learning_rate": 2.5e-06, |
|
"loss": 2.4196, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.018102824040550327, |
|
"grad_norm": 23.08782196044922, |
|
"learning_rate": 3e-06, |
|
"loss": 2.2985, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.021119961380642045, |
|
"grad_norm": 26.61124610900879, |
|
"learning_rate": 3.5e-06, |
|
"loss": 2.3417, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.024137098720733767, |
|
"grad_norm": 19.468734741210938, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.1087, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02715423606082549, |
|
"grad_norm": 33.279293060302734, |
|
"learning_rate": 4.5e-06, |
|
"loss": 2.0878, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.03017137340091721, |
|
"grad_norm": 27.024686813354492, |
|
"learning_rate": 5e-06, |
|
"loss": 2.1126, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03017137340091721, |
|
"eval_cer": 75.39939054285433, |
|
"eval_loss": 1.9080588817596436, |
|
"eval_runtime": 4890.5649, |
|
"eval_samples_per_second": 3.389, |
|
"eval_steps_per_second": 0.424, |
|
"eval_wer": 89.64246657668397, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03318851074100893, |
|
"grad_norm": 24.560420989990234, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 1.8494, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.036205648081100654, |
|
"grad_norm": 24.324853897094727, |
|
"learning_rate": 6e-06, |
|
"loss": 1.8878, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.039222785421192376, |
|
"grad_norm": 28.385276794433594, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 1.7333, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.04223992276128409, |
|
"grad_norm": 27.999126434326172, |
|
"learning_rate": 7e-06, |
|
"loss": 1.6565, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.04525706010137581, |
|
"grad_norm": 29.16621208190918, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.4705, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.048274197441467534, |
|
"grad_norm": 21.424270629882812, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.3766, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.051291334781559256, |
|
"grad_norm": 25.499229431152344, |
|
"learning_rate": 8.5e-06, |
|
"loss": 1.3916, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.05430847212165098, |
|
"grad_norm": 30.007627487182617, |
|
"learning_rate": 9e-06, |
|
"loss": 1.4213, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.0573256094617427, |
|
"grad_norm": 30.537519454956055, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.417, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.06034274680183442, |
|
"grad_norm": 26.885221481323242, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4094, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06034274680183442, |
|
"eval_cer": 63.82253332025917, |
|
"eval_loss": 1.4668316841125488, |
|
"eval_runtime": 4839.8999, |
|
"eval_samples_per_second": 3.424, |
|
"eval_steps_per_second": 0.428, |
|
"eval_wer": 91.40993684791283, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06335988414192614, |
|
"grad_norm": 28.894699096679688, |
|
"learning_rate": 9.944444444444445e-06, |
|
"loss": 1.5314, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.06637702148201786, |
|
"grad_norm": 24.584243774414062, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 1.3876, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.06939415882210959, |
|
"grad_norm": 23.92827033996582, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 1.3787, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.07241129616220131, |
|
"grad_norm": 22.209672927856445, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 1.5477, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07542843350229303, |
|
"grad_norm": 24.50571632385254, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 1.3358, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.07844557084238475, |
|
"grad_norm": 25.96898078918457, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 1.4117, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.08146270818247647, |
|
"grad_norm": 24.520370483398438, |
|
"learning_rate": 9.611111111111112e-06, |
|
"loss": 1.4163, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.08447984552256818, |
|
"grad_norm": 28.337772369384766, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 1.4381, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0874969828626599, |
|
"grad_norm": 22.536033630371094, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.4022, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.09051412020275162, |
|
"grad_norm": 23.963077545166016, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 1.4845, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.09051412020275162, |
|
"eval_cer": 61.253460253286775, |
|
"eval_loss": 1.4192742109298706, |
|
"eval_runtime": 4048.8139, |
|
"eval_samples_per_second": 4.093, |
|
"eval_steps_per_second": 0.512, |
|
"eval_wer": 89.38631146491167, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.09353125754284335, |
|
"grad_norm": 21.442434310913086, |
|
"learning_rate": 9.38888888888889e-06, |
|
"loss": 1.3301, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.09654839488293507, |
|
"grad_norm": 18.17827606201172, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 1.3823, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.09956553222302679, |
|
"grad_norm": 23.526996612548828, |
|
"learning_rate": 9.277777777777778e-06, |
|
"loss": 1.5849, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.10258266956311851, |
|
"grad_norm": 21.815263748168945, |
|
"learning_rate": 9.222222222222224e-06, |
|
"loss": 1.3643, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.10559980690321023, |
|
"grad_norm": 21.027591705322266, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 1.3723, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.10861694424330195, |
|
"grad_norm": 26.622665405273438, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 1.4118, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.11163408158339368, |
|
"grad_norm": 25.46664047241211, |
|
"learning_rate": 9.055555555555556e-06, |
|
"loss": 1.3537, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.1146512189234854, |
|
"grad_norm": 21.33067512512207, |
|
"learning_rate": 9e-06, |
|
"loss": 1.3697, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.11766835626357712, |
|
"grad_norm": 27.155698776245117, |
|
"learning_rate": 8.944444444444446e-06, |
|
"loss": 1.4537, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.12068549360366884, |
|
"grad_norm": 27.02322769165039, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.4639, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12068549360366884, |
|
"eval_cer": 62.40448946649872, |
|
"eval_loss": 1.3861624002456665, |
|
"eval_runtime": 6536.9994, |
|
"eval_samples_per_second": 2.535, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 91.8197850267485, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12370263094376056, |
|
"grad_norm": 24.13388442993164, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 1.4738, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.12671976828385229, |
|
"grad_norm": 26.072269439697266, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 1.4402, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.129736905623944, |
|
"grad_norm": 31.587852478027344, |
|
"learning_rate": 8.722222222222224e-06, |
|
"loss": 1.3485, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.13275404296403573, |
|
"grad_norm": 23.130081176757812, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 1.3384, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.13577118030412744, |
|
"grad_norm": 27.463407516479492, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 1.4537, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.13878831764421917, |
|
"grad_norm": 20.881338119506836, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 1.3062, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.14180545498431088, |
|
"grad_norm": 26.432994842529297, |
|
"learning_rate": 8.5e-06, |
|
"loss": 1.3982, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.14482259232440262, |
|
"grad_norm": 18.55461311340332, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 1.2873, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.14783972966449432, |
|
"grad_norm": 23.862037658691406, |
|
"learning_rate": 8.38888888888889e-06, |
|
"loss": 1.3043, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.15085686700458606, |
|
"grad_norm": 27.1133975982666, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.3855, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.15085686700458606, |
|
"eval_cer": 66.67634657595208, |
|
"eval_loss": 1.367380976676941, |
|
"eval_runtime": 5424.8274, |
|
"eval_samples_per_second": 3.055, |
|
"eval_steps_per_second": 0.382, |
|
"eval_wer": 93.20499305425562, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.15387400434467777, |
|
"grad_norm": 24.38422393798828, |
|
"learning_rate": 8.277777777777778e-06, |
|
"loss": 1.4679, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.1568911416847695, |
|
"grad_norm": 23.12870216369629, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 1.4213, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1599082790248612, |
|
"grad_norm": 24.16248321533203, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 1.2921, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.16292541636495295, |
|
"grad_norm": 22.89928436279297, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 1.3745, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.16594255370504465, |
|
"grad_norm": 26.103015899658203, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 1.3796, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.16895969104513636, |
|
"grad_norm": 17.778417587280273, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.3638, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1719768283852281, |
|
"grad_norm": 23.057931900024414, |
|
"learning_rate": 7.944444444444445e-06, |
|
"loss": 1.3524, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.1749939657253198, |
|
"grad_norm": 20.255752563476562, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 1.3768, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.17801110306541154, |
|
"grad_norm": 19.22992706298828, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 1.247, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.18102824040550325, |
|
"grad_norm": 23.74711036682129, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 1.3741, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18102824040550325, |
|
"eval_cer": 58.32365342404792, |
|
"eval_loss": 1.3499553203582764, |
|
"eval_runtime": 4123.7168, |
|
"eval_samples_per_second": 4.019, |
|
"eval_steps_per_second": 0.502, |
|
"eval_wer": 83.93414843203514, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18404537774559498, |
|
"grad_norm": 22.509279251098633, |
|
"learning_rate": 7.722222222222223e-06, |
|
"loss": 1.2904, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.1870625150856867, |
|
"grad_norm": 22.517881393432617, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 1.3, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.19007965242577843, |
|
"grad_norm": 26.30403709411621, |
|
"learning_rate": 7.611111111111111e-06, |
|
"loss": 1.3627, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.19309678976587014, |
|
"grad_norm": 20.28595542907715, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 1.2716, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.19611392710596187, |
|
"grad_norm": 22.23461151123047, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.4848, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.19913106444605358, |
|
"grad_norm": 23.31128692626953, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 1.2453, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.20214820178614532, |
|
"grad_norm": 23.57061004638672, |
|
"learning_rate": 7.38888888888889e-06, |
|
"loss": 1.3575, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.20516533912623702, |
|
"grad_norm": 19.259546279907227, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 1.2894, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.20818247646632876, |
|
"grad_norm": 25.171783447265625, |
|
"learning_rate": 7.277777777777778e-06, |
|
"loss": 1.3894, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.21119961380642047, |
|
"grad_norm": 21.088159561157227, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 1.3073, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.21119961380642047, |
|
"eval_cer": 58.877671089253795, |
|
"eval_loss": 1.326649785041809, |
|
"eval_runtime": 4478.8906, |
|
"eval_samples_per_second": 3.7, |
|
"eval_steps_per_second": 0.463, |
|
"eval_wer": 81.65732357316676, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.2142167511465122, |
|
"grad_norm": 23.551097869873047, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 1.3203, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.2172338884866039, |
|
"grad_norm": 25.782739639282227, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 1.2752, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.22025102582669562, |
|
"grad_norm": 16.879140853881836, |
|
"learning_rate": 7.055555555555557e-06, |
|
"loss": 1.2145, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.22326816316678735, |
|
"grad_norm": 22.139205932617188, |
|
"learning_rate": 7e-06, |
|
"loss": 1.2678, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.22628530050687906, |
|
"grad_norm": 15.391493797302246, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 1.2468, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.2293024378469708, |
|
"grad_norm": 20.986146926879883, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 1.2093, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.2323195751870625, |
|
"grad_norm": 27.949708938598633, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 1.2795, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.23533671252715424, |
|
"grad_norm": 21.94474220275879, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 1.2931, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.23835384986724595, |
|
"grad_norm": 22.343372344970703, |
|
"learning_rate": 6.7222222222222235e-06, |
|
"loss": 1.2731, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.24137098720733768, |
|
"grad_norm": 21.14777374267578, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.2993, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24137098720733768, |
|
"eval_cer": 59.81580515474027, |
|
"eval_loss": 1.3122555017471313, |
|
"eval_runtime": 5034.5037, |
|
"eval_samples_per_second": 3.292, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 87.38337553324598, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2443881245474294, |
|
"grad_norm": 24.091772079467773, |
|
"learning_rate": 6.6111111111111115e-06, |
|
"loss": 1.3477, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.24740526188752113, |
|
"grad_norm": 22.88396453857422, |
|
"learning_rate": 6.555555555555556e-06, |
|
"loss": 1.3089, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.25042239922761284, |
|
"grad_norm": 17.0950984954834, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 1.2598, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.25343953656770457, |
|
"grad_norm": 19.088529586791992, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 1.2255, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.2564566739077963, |
|
"grad_norm": 24.771873474121094, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 1.2642, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.259473811247888, |
|
"grad_norm": 25.191925048828125, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 1.2942, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.2624909485879797, |
|
"grad_norm": 25.375642776489258, |
|
"learning_rate": 6.277777777777778e-06, |
|
"loss": 1.4181, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.26550808592807146, |
|
"grad_norm": 21.1870059967041, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 1.304, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.2685252232681632, |
|
"grad_norm": 25.293983459472656, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 1.2795, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.2715423606082549, |
|
"grad_norm": 18.648513793945312, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 1.3909, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.2715423606082549, |
|
"eval_cer": 69.14660914034863, |
|
"eval_loss": 1.2956976890563965, |
|
"eval_runtime": 4406.2547, |
|
"eval_samples_per_second": 3.761, |
|
"eval_steps_per_second": 0.47, |
|
"eval_wer": 94.55374823893361, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.2745594979483466, |
|
"grad_norm": 19.018657684326172, |
|
"learning_rate": 6.055555555555555e-06, |
|
"loss": 1.3042, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.27757663528843834, |
|
"grad_norm": 21.156967163085938, |
|
"learning_rate": 6e-06, |
|
"loss": 1.2785, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.28059377262853, |
|
"grad_norm": 18.3033390045166, |
|
"learning_rate": 5.944444444444445e-06, |
|
"loss": 1.2613, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.28361090996862176, |
|
"grad_norm": 16.72675323486328, |
|
"learning_rate": 5.88888888888889e-06, |
|
"loss": 1.2575, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.2866280473087135, |
|
"grad_norm": 28.087432861328125, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 1.3014, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.28964518464880523, |
|
"grad_norm": 26.902196884155273, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 1.2205, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.2926623219888969, |
|
"grad_norm": 23.583770751953125, |
|
"learning_rate": 5.722222222222222e-06, |
|
"loss": 1.256, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.29567945932898865, |
|
"grad_norm": 24.659427642822266, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 1.3358, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.2986965966690804, |
|
"grad_norm": 24.409543991088867, |
|
"learning_rate": 5.611111111111112e-06, |
|
"loss": 1.2786, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.3017137340091721, |
|
"grad_norm": 23.712934494018555, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 1.3662, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3017137340091721, |
|
"eval_cer": 59.539550600782945, |
|
"eval_loss": 1.282615065574646, |
|
"eval_runtime": 5034.6376, |
|
"eval_samples_per_second": 3.292, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 84.68093910404824, |
|
"step": 2500 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 250, |
|
"total_flos": 5.7717080064e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|