|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 39.95179987797437, |
|
"eval_steps": 100.0, |
|
"global_step": 32760, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.6101281269066504, |
|
"grad_norm": 50.660884857177734, |
|
"learning_rate": 1.188e-06, |
|
"loss": 40.3051, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 1.1284080132764343, |
|
"eval_loss": 14.975272178649902, |
|
"eval_runtime": 87.9488, |
|
"eval_samples_per_second": 77.773, |
|
"eval_steps_per_second": 9.722, |
|
"eval_wer": 1.0, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.2196461256863942, |
|
"grad_norm": 66.72471618652344, |
|
"learning_rate": 2.3880000000000003e-06, |
|
"loss": 15.5316, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.8297742525930445, |
|
"grad_norm": 58.356754302978516, |
|
"learning_rate": 3.588e-06, |
|
"loss": 12.3062, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 1.1284376481744902, |
|
"eval_loss": 9.953412055969238, |
|
"eval_runtime": 86.9725, |
|
"eval_samples_per_second": 78.646, |
|
"eval_steps_per_second": 9.831, |
|
"eval_wer": 1.0, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.4392922513727884, |
|
"grad_norm": 37.668678283691406, |
|
"learning_rate": 4.788e-06, |
|
"loss": 9.0113, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 1.1283783783783783, |
|
"eval_loss": 4.837707996368408, |
|
"eval_runtime": 90.6105, |
|
"eval_samples_per_second": 75.488, |
|
"eval_steps_per_second": 9.436, |
|
"eval_wer": 1.0, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.048810250152532, |
|
"grad_norm": 10.56650161743164, |
|
"learning_rate": 5.988e-06, |
|
"loss": 5.8841, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.6589383770591826, |
|
"grad_norm": 1.526659369468689, |
|
"learning_rate": 7.1880000000000005e-06, |
|
"loss": 3.9478, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 1.1284080132764343, |
|
"eval_loss": 3.201613426208496, |
|
"eval_runtime": 92.4896, |
|
"eval_samples_per_second": 73.954, |
|
"eval_steps_per_second": 9.244, |
|
"eval_wer": 1.0, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 4.268456375838926, |
|
"grad_norm": 1.9021031856536865, |
|
"learning_rate": 8.388e-06, |
|
"loss": 3.2121, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.878584502745577, |
|
"grad_norm": 2.086733102798462, |
|
"learning_rate": 9.588e-06, |
|
"loss": 2.7396, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 1.1284376481744902, |
|
"eval_loss": 2.4831321239471436, |
|
"eval_runtime": 89.7632, |
|
"eval_samples_per_second": 76.201, |
|
"eval_steps_per_second": 9.525, |
|
"eval_wer": 1.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.48810250152532, |
|
"grad_norm": 4.460277557373047, |
|
"learning_rate": 1.0787999999999999e-05, |
|
"loss": 2.2509, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 1.1053816974869606, |
|
"eval_loss": 0.9197890758514404, |
|
"eval_runtime": 86.71, |
|
"eval_samples_per_second": 78.884, |
|
"eval_steps_per_second": 9.86, |
|
"eval_wer": 0.9998538011695907, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 6.097620500305064, |
|
"grad_norm": 3.249329090118408, |
|
"learning_rate": 1.1988000000000001e-05, |
|
"loss": 1.2958, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.707748627211714, |
|
"grad_norm": 3.6012656688690186, |
|
"learning_rate": 1.3188e-05, |
|
"loss": 0.6886, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 1.0272641062114747, |
|
"eval_loss": 0.3222425580024719, |
|
"eval_runtime": 89.592, |
|
"eval_samples_per_second": 76.346, |
|
"eval_steps_per_second": 9.543, |
|
"eval_wer": 0.9991228070175439, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 7.317266625991458, |
|
"grad_norm": 3.2955262660980225, |
|
"learning_rate": 1.4388000000000002e-05, |
|
"loss": 0.439, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.927394752898109, |
|
"grad_norm": 25.04276466369629, |
|
"learning_rate": 1.5588e-05, |
|
"loss": 0.318, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 1.0191441441441442, |
|
"eval_loss": 0.16009780764579773, |
|
"eval_runtime": 88.8354, |
|
"eval_samples_per_second": 76.996, |
|
"eval_steps_per_second": 9.625, |
|
"eval_wer": 0.9991228070175439, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 8.536912751677852, |
|
"grad_norm": 2.714284896850586, |
|
"learning_rate": 1.6788e-05, |
|
"loss": 0.255, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 1.0176327643432908, |
|
"eval_loss": 0.09021047502756119, |
|
"eval_runtime": 89.3568, |
|
"eval_samples_per_second": 76.547, |
|
"eval_steps_per_second": 9.568, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 9.146430750457595, |
|
"grad_norm": 4.7839813232421875, |
|
"learning_rate": 1.7988e-05, |
|
"loss": 0.2207, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.756558877364247, |
|
"grad_norm": 2.4737026691436768, |
|
"learning_rate": 1.9188e-05, |
|
"loss": 0.1835, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 1.019736842105263, |
|
"eval_loss": 0.07156170159578323, |
|
"eval_runtime": 88.4936, |
|
"eval_samples_per_second": 77.294, |
|
"eval_steps_per_second": 9.662, |
|
"eval_wer": 0.9991228070175439, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 10.36607687614399, |
|
"grad_norm": 6.031515121459961, |
|
"learning_rate": 2.0388e-05, |
|
"loss": 0.1691, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 10.97620500305064, |
|
"grad_norm": 3.6490681171417236, |
|
"learning_rate": 2.1588e-05, |
|
"loss": 0.16, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 1.017514224751067, |
|
"eval_loss": 0.04918373003602028, |
|
"eval_runtime": 88.5165, |
|
"eval_samples_per_second": 77.274, |
|
"eval_steps_per_second": 9.659, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 11.585723001830385, |
|
"grad_norm": 4.373748779296875, |
|
"learning_rate": 2.2788000000000003e-05, |
|
"loss": 0.1444, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 1.016151019440493, |
|
"eval_loss": 0.03436645492911339, |
|
"eval_runtime": 88.2172, |
|
"eval_samples_per_second": 77.536, |
|
"eval_steps_per_second": 9.692, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 12.195241000610128, |
|
"grad_norm": 0.14861944317817688, |
|
"learning_rate": 2.3988e-05, |
|
"loss": 0.1382, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.805369127516778, |
|
"grad_norm": 4.033771991729736, |
|
"learning_rate": 2.5188e-05, |
|
"loss": 0.1307, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 1.0176327643432908, |
|
"eval_loss": 0.038768209517002106, |
|
"eval_runtime": 95.1356, |
|
"eval_samples_per_second": 71.897, |
|
"eval_steps_per_second": 8.987, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 13.414887126296522, |
|
"grad_norm": 2.4692184925079346, |
|
"learning_rate": 2.63856e-05, |
|
"loss": 0.1346, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 1.0166548127074442, |
|
"eval_loss": 0.0276629775762558, |
|
"eval_runtime": 87.0897, |
|
"eval_samples_per_second": 78.54, |
|
"eval_steps_per_second": 9.817, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 14.024405125076266, |
|
"grad_norm": 3.765444278717041, |
|
"learning_rate": 2.7585600000000002e-05, |
|
"loss": 0.1287, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 14.634533251982916, |
|
"grad_norm": 1.3012793064117432, |
|
"learning_rate": 2.87856e-05, |
|
"loss": 0.1257, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 1.0164473684210527, |
|
"eval_loss": 0.02670404687523842, |
|
"eval_runtime": 92.4139, |
|
"eval_samples_per_second": 74.015, |
|
"eval_steps_per_second": 9.252, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 15.24405125076266, |
|
"grad_norm": 6.9578728675842285, |
|
"learning_rate": 2.99856e-05, |
|
"loss": 0.1276, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 15.854179377669311, |
|
"grad_norm": 3.3747990131378174, |
|
"learning_rate": 2.9956013070043084e-05, |
|
"loss": 0.1157, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 1.0165955429113325, |
|
"eval_loss": 0.02758130058646202, |
|
"eval_runtime": 89.1614, |
|
"eval_samples_per_second": 76.715, |
|
"eval_steps_per_second": 9.589, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 16.463697376449055, |
|
"grad_norm": 2.6571855545043945, |
|
"learning_rate": 2.9822174136311704e-05, |
|
"loss": 0.1104, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 1.0171586059743956, |
|
"eval_loss": 0.03412469103932381, |
|
"eval_runtime": 88.3916, |
|
"eval_samples_per_second": 77.383, |
|
"eval_steps_per_second": 9.673, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 17.0732153752288, |
|
"grad_norm": 0.15126635134220123, |
|
"learning_rate": 2.9599814696946643e-05, |
|
"loss": 0.1152, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 17.683343502135447, |
|
"grad_norm": 1.5255239009857178, |
|
"learning_rate": 2.9289379955813937e-05, |
|
"loss": 0.1086, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 1.016743717401612, |
|
"eval_loss": 0.023562652990221977, |
|
"eval_runtime": 99.8828, |
|
"eval_samples_per_second": 68.48, |
|
"eval_steps_per_second": 8.56, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 18.29286150091519, |
|
"grad_norm": 4.014769554138184, |
|
"learning_rate": 2.8893091974003682e-05, |
|
"loss": 0.1007, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 18.902989627821842, |
|
"grad_norm": 1.9409995079040527, |
|
"learning_rate": 2.84143727148899e-05, |
|
"loss": 0.1072, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 1.0175438596491229, |
|
"eval_loss": 0.030639806762337685, |
|
"eval_runtime": 84.6618, |
|
"eval_samples_per_second": 80.792, |
|
"eval_steps_per_second": 10.099, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 19.512507626601586, |
|
"grad_norm": 13.441529273986816, |
|
"learning_rate": 2.7855379321676933e-05, |
|
"loss": 0.128, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 1.0160324798482694, |
|
"eval_loss": 0.024129284545779228, |
|
"eval_runtime": 90.1768, |
|
"eval_samples_per_second": 75.851, |
|
"eval_steps_per_second": 9.481, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 20.12202562538133, |
|
"grad_norm": 2.291337490081787, |
|
"learning_rate": 2.7218107759869366e-05, |
|
"loss": 0.1062, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 20.73215375228798, |
|
"grad_norm": 4.577757835388184, |
|
"learning_rate": 2.650742754426605e-05, |
|
"loss": 0.0987, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 1.0165066382171646, |
|
"eval_loss": 0.02422538958489895, |
|
"eval_runtime": 97.4522, |
|
"eval_samples_per_second": 70.188, |
|
"eval_steps_per_second": 8.774, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 17220 |
|
}, |
|
{ |
|
"epoch": 21.341671751067725, |
|
"grad_norm": 6.208291530609131, |
|
"learning_rate": 2.5727608573195923e-05, |
|
"loss": 0.1035, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 21.951799877974373, |
|
"grad_norm": 9.813406944274902, |
|
"learning_rate": 2.4883336143432908e-05, |
|
"loss": 0.1031, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 1.0159435751541015, |
|
"eval_loss": 0.02291141264140606, |
|
"eval_runtime": 85.7962, |
|
"eval_samples_per_second": 79.724, |
|
"eval_steps_per_second": 9.965, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 18040 |
|
}, |
|
{ |
|
"epoch": 22.561317876754117, |
|
"grad_norm": 0.14426590502262115, |
|
"learning_rate": 2.3979682800065307e-05, |
|
"loss": 0.0903, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 1.016239924134661, |
|
"eval_loss": 0.02606978453695774, |
|
"eval_runtime": 103.6933, |
|
"eval_samples_per_second": 65.964, |
|
"eval_steps_per_second": 8.245, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 18860 |
|
}, |
|
{ |
|
"epoch": 23.17083587553386, |
|
"grad_norm": 33.88288497924805, |
|
"learning_rate": 2.3024043139715204e-05, |
|
"loss": 0.0964, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 23.780964002440513, |
|
"grad_norm": 2.688063859939575, |
|
"learning_rate": 2.2018330526045242e-05, |
|
"loss": 0.0895, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 1.0176920341394025, |
|
"eval_loss": 0.03217785060405731, |
|
"eval_runtime": 89.5142, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 9.552, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 19680 |
|
}, |
|
{ |
|
"epoch": 24.390482001220256, |
|
"grad_norm": 1.0760211944580078, |
|
"learning_rate": 2.0970450483020733e-05, |
|
"loss": 0.0844, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 0.8288090229034424, |
|
"learning_rate": 1.9886698867971603e-05, |
|
"loss": 0.0835, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 1.0152027027027026, |
|
"eval_loss": 0.018799621611833572, |
|
"eval_runtime": 88.0548, |
|
"eval_samples_per_second": 77.679, |
|
"eval_steps_per_second": 9.71, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 25.610128126906652, |
|
"grad_norm": 2.2765700817108154, |
|
"learning_rate": 1.877358706127469e-05, |
|
"loss": 0.0744, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 1.0149063537221432, |
|
"eval_loss": 0.01787102408707142, |
|
"eval_runtime": 95.8003, |
|
"eval_samples_per_second": 71.399, |
|
"eval_steps_per_second": 8.925, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 21320 |
|
}, |
|
{ |
|
"epoch": 26.219646125686396, |
|
"grad_norm": 2.9212796688079834, |
|
"learning_rate": 1.7637802844774755e-05, |
|
"loss": 0.0738, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 26.829774252593044, |
|
"grad_norm": 3.6706600189208984, |
|
"learning_rate": 1.6486170220352805e-05, |
|
"loss": 0.0728, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 1.014521100047416, |
|
"eval_loss": 0.010682709515094757, |
|
"eval_runtime": 90.6475, |
|
"eval_samples_per_second": 75.457, |
|
"eval_steps_per_second": 9.432, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 22140 |
|
}, |
|
{ |
|
"epoch": 27.439292251372787, |
|
"grad_norm": 1.4717997312545776, |
|
"learning_rate": 1.5325608410059234e-05, |
|
"loss": 0.0704, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 1.0149063537221432, |
|
"eval_loss": 0.016120394691824913, |
|
"eval_runtime": 91.4543, |
|
"eval_samples_per_second": 74.791, |
|
"eval_steps_per_second": 9.349, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 22960 |
|
}, |
|
{ |
|
"epoch": 28.04881025015253, |
|
"grad_norm": 0.24731288850307465, |
|
"learning_rate": 1.4163090284146517e-05, |
|
"loss": 0.0651, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 28.658938377059183, |
|
"grad_norm": 1.4096115827560425, |
|
"learning_rate": 1.3007905796568247e-05, |
|
"loss": 0.068, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 1.0149656235182551, |
|
"eval_loss": 0.014005111530423164, |
|
"eval_runtime": 85.3622, |
|
"eval_samples_per_second": 80.129, |
|
"eval_steps_per_second": 10.016, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 23780 |
|
}, |
|
{ |
|
"epoch": 29.268456375838927, |
|
"grad_norm": 1.2507057189941406, |
|
"learning_rate": 1.1862367835867989e-05, |
|
"loss": 0.062, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 29.878584502745575, |
|
"grad_norm": 0.42576301097869873, |
|
"learning_rate": 1.073568135019168e-05, |
|
"loss": 0.0635, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 1.0147878141299194, |
|
"eval_loss": 0.017921432852745056, |
|
"eval_runtime": 86.6261, |
|
"eval_samples_per_second": 78.96, |
|
"eval_steps_per_second": 9.87, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 30.48810250152532, |
|
"grad_norm": 1.0933780670166016, |
|
"learning_rate": 9.634615680568962e-06, |
|
"loss": 0.0606, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_cer": 1.0146100047415836, |
|
"eval_loss": 0.016976628452539444, |
|
"eval_runtime": 85.312, |
|
"eval_samples_per_second": 80.176, |
|
"eval_steps_per_second": 10.022, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 25420 |
|
}, |
|
{ |
|
"epoch": 31.097620500305062, |
|
"grad_norm": 2.098126173019409, |
|
"learning_rate": 8.56578623342252e-06, |
|
"loss": 0.0588, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 31.707748627211714, |
|
"grad_norm": 0.9397180080413818, |
|
"learning_rate": 7.535614733981355e-06, |
|
"loss": 0.0549, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 1.0146100047415836, |
|
"eval_loss": 0.012712378054857254, |
|
"eval_runtime": 78.6951, |
|
"eval_samples_per_second": 86.918, |
|
"eval_steps_per_second": 10.865, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 26240 |
|
}, |
|
{ |
|
"epoch": 32.31726662599146, |
|
"grad_norm": 3.3270745277404785, |
|
"learning_rate": 6.550290643366546e-06, |
|
"loss": 0.0545, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 32.92739475289811, |
|
"grad_norm": 1.39926016330719, |
|
"learning_rate": 5.615733971162722e-06, |
|
"loss": 0.0557, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_cer": 1.0145803698435278, |
|
"eval_loss": 0.011217311024665833, |
|
"eval_runtime": 76.5204, |
|
"eval_samples_per_second": 89.388, |
|
"eval_steps_per_second": 11.173, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 27060 |
|
}, |
|
{ |
|
"epoch": 33.53691275167785, |
|
"grad_norm": 0.6240960955619812, |
|
"learning_rate": 4.737559706904321e-06, |
|
"loss": 0.0525, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_cer": 1.0145507349454719, |
|
"eval_loss": 0.013999322429299355, |
|
"eval_runtime": 76.0714, |
|
"eval_samples_per_second": 89.915, |
|
"eval_steps_per_second": 11.239, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 27880 |
|
}, |
|
{ |
|
"epoch": 34.1464307504576, |
|
"grad_norm": 0.5480217933654785, |
|
"learning_rate": 3.924180691546633e-06, |
|
"loss": 0.0509, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 34.75655887736425, |
|
"grad_norm": 0.09108582139015198, |
|
"learning_rate": 3.1739539781329047e-06, |
|
"loss": 0.0478, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_cer": 1.0145507349454719, |
|
"eval_loss": 0.012541352771222591, |
|
"eval_runtime": 75.8184, |
|
"eval_samples_per_second": 90.216, |
|
"eval_steps_per_second": 11.277, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 35.36607687614399, |
|
"grad_norm": 5.189497470855713, |
|
"learning_rate": 2.4947803389966218e-06, |
|
"loss": 0.051, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 35.97620500305064, |
|
"grad_norm": 1.7753878831863403, |
|
"learning_rate": 1.8907403751213792e-06, |
|
"loss": 0.0475, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_cer": 1.0145803698435278, |
|
"eval_loss": 0.012446345761418343, |
|
"eval_runtime": 75.8714, |
|
"eval_samples_per_second": 90.153, |
|
"eval_steps_per_second": 11.269, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 29520 |
|
}, |
|
{ |
|
"epoch": 36.58572300183038, |
|
"grad_norm": 1.131529688835144, |
|
"learning_rate": 1.3654632704576153e-06, |
|
"loss": 0.0455, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_cer": 1.0141951161688003, |
|
"eval_loss": 0.011377622373402119, |
|
"eval_runtime": 76.0985, |
|
"eval_samples_per_second": 89.884, |
|
"eval_steps_per_second": 11.235, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 30340 |
|
}, |
|
{ |
|
"epoch": 37.195241000610125, |
|
"grad_norm": 0.012475825846195221, |
|
"learning_rate": 9.229080945215807e-07, |
|
"loss": 0.0502, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 37.80536912751678, |
|
"grad_norm": 0.05113031342625618, |
|
"learning_rate": 5.639609229205172e-07, |
|
"loss": 0.0444, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_cer": 1.0142247510668563, |
|
"eval_loss": 0.011828911490738392, |
|
"eval_runtime": 94.6786, |
|
"eval_samples_per_second": 72.244, |
|
"eval_steps_per_second": 9.031, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 31160 |
|
}, |
|
{ |
|
"epoch": 38.41488712629652, |
|
"grad_norm": 2.288602113723755, |
|
"learning_rate": 2.9174814715380303e-07, |
|
"loss": 0.0476, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_cer": 1.0141951161688003, |
|
"eval_loss": 0.011857305653393269, |
|
"eval_runtime": 74.477, |
|
"eval_samples_per_second": 91.84, |
|
"eval_steps_per_second": 11.48, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 31980 |
|
}, |
|
{ |
|
"epoch": 39.024405125076264, |
|
"grad_norm": 1.6855770349502563, |
|
"learning_rate": 1.0790527198271116e-07, |
|
"loss": 0.0451, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 39.634533251982916, |
|
"grad_norm": 2.309912919998169, |
|
"learning_rate": 1.3536859442666582e-08, |
|
"loss": 0.0464, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 39.95179987797437, |
|
"eval_cer": 1.0143432906590801, |
|
"eval_loss": 0.011460067704319954, |
|
"eval_runtime": 89.8642, |
|
"eval_samples_per_second": 76.115, |
|
"eval_steps_per_second": 9.514, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 32760 |
|
}, |
|
{ |
|
"epoch": 39.95179987797437, |
|
"step": 32760, |
|
"total_flos": 2.3262081232451936e+19, |
|
"train_loss": 1.5701773451070355, |
|
"train_runtime": 35107.1781, |
|
"train_samples_per_second": 59.748, |
|
"train_steps_per_second": 0.933 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 32760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3262081232451936e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|