|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 39.95179987797437, |
|
"eval_steps": 100.0, |
|
"global_step": 32760, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.6101281269066504, |
|
"grad_norm": 55.342838287353516, |
|
"learning_rate": 1.1904e-06, |
|
"loss": 29.232, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 1.1283783783783783, |
|
"eval_loss": 11.350740432739258, |
|
"eval_runtime": 78.3217, |
|
"eval_samples_per_second": 87.332, |
|
"eval_steps_per_second": 10.917, |
|
"eval_wer": 1.0, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.2196461256863942, |
|
"grad_norm": 47.82231903076172, |
|
"learning_rate": 2.3880000000000003e-06, |
|
"loss": 11.653, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.8297742525930445, |
|
"grad_norm": 40.30710220336914, |
|
"learning_rate": 3.588e-06, |
|
"loss": 9.4623, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 1.1284080132764343, |
|
"eval_loss": 7.82855224609375, |
|
"eval_runtime": 75.8772, |
|
"eval_samples_per_second": 90.146, |
|
"eval_steps_per_second": 11.268, |
|
"eval_wer": 1.0, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.4392922513727884, |
|
"grad_norm": 20.09738540649414, |
|
"learning_rate": 4.788e-06, |
|
"loss": 7.1817, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 1.1284080132764343, |
|
"eval_loss": 4.291374683380127, |
|
"eval_runtime": 70.7133, |
|
"eval_samples_per_second": 96.729, |
|
"eval_steps_per_second": 12.091, |
|
"eval_wer": 1.0, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.048810250152532, |
|
"grad_norm": 11.131582260131836, |
|
"learning_rate": 5.988e-06, |
|
"loss": 4.9943, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.6589383770591826, |
|
"grad_norm": 3.2968924045562744, |
|
"learning_rate": 7.1880000000000005e-06, |
|
"loss": 3.6746, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 1.1284080132764343, |
|
"eval_loss": 2.9958813190460205, |
|
"eval_runtime": 70.8535, |
|
"eval_samples_per_second": 96.537, |
|
"eval_steps_per_second": 12.067, |
|
"eval_wer": 1.0, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 4.268456375838926, |
|
"grad_norm": 1.5761828422546387, |
|
"learning_rate": 8.388e-06, |
|
"loss": 3.0354, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.878584502745577, |
|
"grad_norm": 2.042921543121338, |
|
"learning_rate": 9.588e-06, |
|
"loss": 2.6344, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 1.1284376481744902, |
|
"eval_loss": 2.379577159881592, |
|
"eval_runtime": 71.7131, |
|
"eval_samples_per_second": 95.38, |
|
"eval_steps_per_second": 11.923, |
|
"eval_wer": 1.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.48810250152532, |
|
"grad_norm": 4.554098606109619, |
|
"learning_rate": 1.0787999999999999e-05, |
|
"loss": 2.224, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 1.0735538169748695, |
|
"eval_loss": 1.1762758493423462, |
|
"eval_runtime": 74.8637, |
|
"eval_samples_per_second": 91.366, |
|
"eval_steps_per_second": 11.421, |
|
"eval_wer": 1.0, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 6.097620500305064, |
|
"grad_norm": 3.2407827377319336, |
|
"learning_rate": 1.1988000000000001e-05, |
|
"loss": 1.5426, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.707748627211714, |
|
"grad_norm": 8.200541496276855, |
|
"learning_rate": 1.3188e-05, |
|
"loss": 0.8482, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.9927098150782361, |
|
"eval_loss": 0.46800941228866577, |
|
"eval_runtime": 71.6177, |
|
"eval_samples_per_second": 95.507, |
|
"eval_steps_per_second": 11.938, |
|
"eval_wer": 0.9998538011695907, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 7.317266625991458, |
|
"grad_norm": 3.0007307529449463, |
|
"learning_rate": 1.4388000000000002e-05, |
|
"loss": 0.5869, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.927394752898109, |
|
"grad_norm": 4.457747459411621, |
|
"learning_rate": 1.5588e-05, |
|
"loss": 0.4357, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.9734471313418682, |
|
"eval_loss": 0.27095767855644226, |
|
"eval_runtime": 76.0314, |
|
"eval_samples_per_second": 89.963, |
|
"eval_steps_per_second": 11.245, |
|
"eval_wer": 0.9992690058479532, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 8.536912751677852, |
|
"grad_norm": 6.115583419799805, |
|
"learning_rate": 1.6788e-05, |
|
"loss": 0.3568, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 1.0188477951635846, |
|
"eval_loss": 0.1256408840417862, |
|
"eval_runtime": 71.254, |
|
"eval_samples_per_second": 95.995, |
|
"eval_steps_per_second": 11.999, |
|
"eval_wer": 0.999561403508772, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 9.146430750457595, |
|
"grad_norm": 4.245608806610107, |
|
"learning_rate": 1.7988e-05, |
|
"loss": 0.2776, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.756558877364247, |
|
"grad_norm": 5.325539588928223, |
|
"learning_rate": 1.9188e-05, |
|
"loss": 0.2374, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 1.0219001896633475, |
|
"eval_loss": 0.14763644337654114, |
|
"eval_runtime": 73.2557, |
|
"eval_samples_per_second": 93.372, |
|
"eval_steps_per_second": 11.671, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 10.36607687614399, |
|
"grad_norm": 5.118485450744629, |
|
"learning_rate": 2.0388e-05, |
|
"loss": 0.2199, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 10.97620500305064, |
|
"grad_norm": 7.805136203765869, |
|
"learning_rate": 2.1588e-05, |
|
"loss": 0.1941, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 1.018669985775249, |
|
"eval_loss": 0.059657976031303406, |
|
"eval_runtime": 72.2722, |
|
"eval_samples_per_second": 94.642, |
|
"eval_steps_per_second": 11.83, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 11.585723001830385, |
|
"grad_norm": 8.909073829650879, |
|
"learning_rate": 2.2788000000000003e-05, |
|
"loss": 0.1829, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 1.0188477951635846, |
|
"eval_loss": 0.046978700906038284, |
|
"eval_runtime": 70.6222, |
|
"eval_samples_per_second": 96.853, |
|
"eval_steps_per_second": 12.107, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 12.195241000610128, |
|
"grad_norm": 2.5103747844696045, |
|
"learning_rate": 2.3988e-05, |
|
"loss": 0.1802, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.805369127516778, |
|
"grad_norm": 3.005159378051758, |
|
"learning_rate": 2.5188e-05, |
|
"loss": 0.1701, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 1.018136557610242, |
|
"eval_loss": 0.05100085958838463, |
|
"eval_runtime": 70.6009, |
|
"eval_samples_per_second": 96.883, |
|
"eval_steps_per_second": 12.11, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 13.414887126296522, |
|
"grad_norm": 8.558382987976074, |
|
"learning_rate": 2.6388000000000002e-05, |
|
"loss": 0.1603, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 1.0192626837363679, |
|
"eval_loss": 0.037522342056035995, |
|
"eval_runtime": 70.9174, |
|
"eval_samples_per_second": 96.45, |
|
"eval_steps_per_second": 12.056, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 14.024405125076266, |
|
"grad_norm": 0.5282774567604065, |
|
"learning_rate": 2.7588e-05, |
|
"loss": 0.1579, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 14.634533251982916, |
|
"grad_norm": 6.527963161468506, |
|
"learning_rate": 2.8788e-05, |
|
"loss": 0.1564, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 1.0064011379800855, |
|
"eval_loss": 0.0865325778722763, |
|
"eval_runtime": 72.0213, |
|
"eval_samples_per_second": 94.972, |
|
"eval_steps_per_second": 11.871, |
|
"eval_wer": 0.9991228070175439, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 15.24405125076266, |
|
"grad_norm": 3.7430195808410645, |
|
"learning_rate": 2.9988e-05, |
|
"loss": 0.1632, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 15.854179377669311, |
|
"grad_norm": 3.735182762145996, |
|
"learning_rate": 2.9956013070043084e-05, |
|
"loss": 0.1555, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 1.0171882408724513, |
|
"eval_loss": 0.0239888783544302, |
|
"eval_runtime": 79.7324, |
|
"eval_samples_per_second": 85.787, |
|
"eval_steps_per_second": 10.723, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 16.463697376449055, |
|
"grad_norm": 2.248210906982422, |
|
"learning_rate": 2.9822174136311704e-05, |
|
"loss": 0.1446, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 1.0184329065908013, |
|
"eval_loss": 0.03877296671271324, |
|
"eval_runtime": 70.4641, |
|
"eval_samples_per_second": 97.071, |
|
"eval_steps_per_second": 12.134, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 17.0732153752288, |
|
"grad_norm": 1.9990071058273315, |
|
"learning_rate": 2.9599280835811145e-05, |
|
"loss": 0.1432, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 17.683343502135447, |
|
"grad_norm": 1.6087830066680908, |
|
"learning_rate": 2.9289379955813937e-05, |
|
"loss": 0.1374, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 1.0229077761972498, |
|
"eval_loss": 0.07236260920763016, |
|
"eval_runtime": 72.8414, |
|
"eval_samples_per_second": 93.903, |
|
"eval_steps_per_second": 11.738, |
|
"eval_wer": 0.9991228070175439, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 18.29286150091519, |
|
"grad_norm": 1.2661323547363281, |
|
"learning_rate": 2.8893091974003682e-05, |
|
"loss": 0.1271, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 18.902989627821842, |
|
"grad_norm": 2.638475179672241, |
|
"learning_rate": 2.841333172308954e-05, |
|
"loss": 0.1358, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 1.016802987197724, |
|
"eval_loss": 0.029252657666802406, |
|
"eval_runtime": 79.4655, |
|
"eval_samples_per_second": 86.075, |
|
"eval_steps_per_second": 10.759, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 19.512507626601586, |
|
"grad_norm": 3.171802043914795, |
|
"learning_rate": 2.785298169149414e-05, |
|
"loss": 0.126, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 1.0173364153627311, |
|
"eval_loss": 0.02503075823187828, |
|
"eval_runtime": 82.6447, |
|
"eval_samples_per_second": 82.764, |
|
"eval_steps_per_second": 10.345, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 20.12202562538133, |
|
"grad_norm": 2.895329236984253, |
|
"learning_rate": 2.7215408565964914e-05, |
|
"loss": 0.1259, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 20.73215375228798, |
|
"grad_norm": 8.28530502319336, |
|
"learning_rate": 2.650444300389672e-05, |
|
"loss": 0.1238, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 1.017366050260787, |
|
"eval_loss": 0.03371915966272354, |
|
"eval_runtime": 80.9247, |
|
"eval_samples_per_second": 84.523, |
|
"eval_steps_per_second": 10.565, |
|
"eval_wer": 0.9991228070175439, |
|
"step": 17220 |
|
}, |
|
{ |
|
"epoch": 21.341671751067725, |
|
"grad_norm": 10.950956344604492, |
|
"learning_rate": 2.5724356618032884e-05, |
|
"loss": 0.1193, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 21.951799877974373, |
|
"grad_norm": 2.780343770980835, |
|
"learning_rate": 2.4879836311824927e-05, |
|
"loss": 0.1106, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 1.0159139402560455, |
|
"eval_loss": 0.018149759620428085, |
|
"eval_runtime": 71.3853, |
|
"eval_samples_per_second": 95.818, |
|
"eval_steps_per_second": 11.977, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 18040 |
|
}, |
|
{ |
|
"epoch": 22.561317876754117, |
|
"grad_norm": 1.4264533519744873, |
|
"learning_rate": 2.397595611964874e-05, |
|
"loss": 0.1124, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 1.0165066382171646, |
|
"eval_loss": 0.019898999482393265, |
|
"eval_runtime": 81.6329, |
|
"eval_samples_per_second": 83.79, |
|
"eval_steps_per_second": 10.474, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 18860 |
|
}, |
|
{ |
|
"epoch": 23.17083587553386, |
|
"grad_norm": 3.757054328918457, |
|
"learning_rate": 2.302011238680703e-05, |
|
"loss": 0.1079, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 23.780964002440513, |
|
"grad_norm": 6.899264812469482, |
|
"learning_rate": 2.2014218885552525e-05, |
|
"loss": 0.104, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 1.015054528212423, |
|
"eval_loss": 0.01506368163973093, |
|
"eval_runtime": 72.1117, |
|
"eval_samples_per_second": 94.853, |
|
"eval_steps_per_second": 11.857, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 19680 |
|
}, |
|
{ |
|
"epoch": 24.390482001220256, |
|
"grad_norm": 1.1912193298339844, |
|
"learning_rate": 2.096618265844089e-05, |
|
"loss": 0.1025, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 2.540283679962158, |
|
"learning_rate": 1.988230050118496e-05, |
|
"loss": 0.1039, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 1.0149359886201992, |
|
"eval_loss": 0.013627970591187477, |
|
"eval_runtime": 73.5874, |
|
"eval_samples_per_second": 92.951, |
|
"eval_steps_per_second": 11.619, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 25.610128126906652, |
|
"grad_norm": 1.817094326019287, |
|
"learning_rate": 1.876908457848333e-05, |
|
"loss": 0.094, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 1.0147878141299194, |
|
"eval_loss": 0.013174526393413544, |
|
"eval_runtime": 78.6671, |
|
"eval_samples_per_second": 86.949, |
|
"eval_steps_per_second": 10.869, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 21320 |
|
}, |
|
{ |
|
"epoch": 26.219646125686396, |
|
"grad_norm": 1.6206278800964355, |
|
"learning_rate": 1.7633223297728993e-05, |
|
"loss": 0.0919, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 26.829774252593044, |
|
"grad_norm": 0.5614987015724182, |
|
"learning_rate": 1.6481541123819273e-05, |
|
"loss": 0.0921, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 1.0157954006638217, |
|
"eval_loss": 0.017084894701838493, |
|
"eval_runtime": 76.9298, |
|
"eval_samples_per_second": 88.912, |
|
"eval_steps_per_second": 11.114, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 22140 |
|
}, |
|
{ |
|
"epoch": 27.439292251372787, |
|
"grad_norm": 1.8238394260406494, |
|
"learning_rate": 1.532095757650705e-05, |
|
"loss": 0.0832, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 1.0146396396396395, |
|
"eval_loss": 0.012186683714389801, |
|
"eval_runtime": 72.328, |
|
"eval_samples_per_second": 94.569, |
|
"eval_steps_per_second": 11.821, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 22960 |
|
}, |
|
{ |
|
"epoch": 28.04881025015253, |
|
"grad_norm": 3.1914637088775635, |
|
"learning_rate": 1.4160767960306099e-05, |
|
"loss": 0.0899, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 28.658938377059183, |
|
"grad_norm": 1.6626567840576172, |
|
"learning_rate": 1.300329518493389e-05, |
|
"loss": 0.08, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 1.015054528212423, |
|
"eval_loss": 0.014703178778290749, |
|
"eval_runtime": 69.3866, |
|
"eval_samples_per_second": 98.578, |
|
"eval_steps_per_second": 12.322, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 23780 |
|
}, |
|
{ |
|
"epoch": 29.268456375838927, |
|
"grad_norm": 1.1348814964294434, |
|
"learning_rate": 1.1857818981811845e-05, |
|
"loss": 0.0822, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 29.878584502745575, |
|
"grad_norm": 0.9157238602638245, |
|
"learning_rate": 1.0731221584071209e-05, |
|
"loss": 0.0797, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 1.0146692745376955, |
|
"eval_loss": 0.01217756699770689, |
|
"eval_runtime": 70.4102, |
|
"eval_samples_per_second": 97.145, |
|
"eval_steps_per_second": 12.143, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 30.48810250152532, |
|
"grad_norm": 2.62127947807312, |
|
"learning_rate": 9.630271797484814e-06, |
|
"loss": 0.0775, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_cer": 1.015054528212423, |
|
"eval_loss": 0.013129099272191525, |
|
"eval_runtime": 69.5638, |
|
"eval_samples_per_second": 98.327, |
|
"eval_steps_per_second": 12.291, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 25420 |
|
}, |
|
{ |
|
"epoch": 31.097620500305062, |
|
"grad_norm": 1.8141522407531738, |
|
"learning_rate": 8.561584332228596e-06, |
|
"loss": 0.071, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 31.707748627211714, |
|
"grad_norm": 0.32249507308006287, |
|
"learning_rate": 7.533597307465705e-06, |
|
"loss": 0.0675, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 1.0148174490279753, |
|
"eval_loss": 0.01305685006082058, |
|
"eval_runtime": 70.307, |
|
"eval_samples_per_second": 97.288, |
|
"eval_steps_per_second": 12.161, |
|
"eval_wer": 0.9991228070175439, |
|
"step": 26240 |
|
}, |
|
{ |
|
"epoch": 32.31726662599146, |
|
"grad_norm": 0.06660401076078415, |
|
"learning_rate": 6.550290643366546e-06, |
|
"loss": 0.07, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 32.92739475289811, |
|
"grad_norm": 0.08903522789478302, |
|
"learning_rate": 5.617548632301114e-06, |
|
"loss": 0.0676, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_cer": 1.0146100047415836, |
|
"eval_loss": 0.017076797783374786, |
|
"eval_runtime": 71.0362, |
|
"eval_samples_per_second": 96.289, |
|
"eval_steps_per_second": 12.036, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 27060 |
|
}, |
|
{ |
|
"epoch": 33.53691275167785, |
|
"grad_norm": 4.330328464508057, |
|
"learning_rate": 4.739256219207167e-06, |
|
"loss": 0.069, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_cer": 1.0145803698435278, |
|
"eval_loss": 0.014736202545464039, |
|
"eval_runtime": 73.5978, |
|
"eval_samples_per_second": 92.938, |
|
"eval_steps_per_second": 11.617, |
|
"eval_wer": 0.9991228070175439, |
|
"step": 27880 |
|
}, |
|
{ |
|
"epoch": 34.1464307504576, |
|
"grad_norm": 0.1510840207338333, |
|
"learning_rate": 3.922612254686006e-06, |
|
"loss": 0.0629, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 34.75655887736425, |
|
"grad_norm": 0.51495361328125, |
|
"learning_rate": 3.1725232868909293e-06, |
|
"loss": 0.0588, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_cer": 1.0146396396396395, |
|
"eval_loss": 0.014013656415045261, |
|
"eval_runtime": 73.9594, |
|
"eval_samples_per_second": 92.483, |
|
"eval_steps_per_second": 11.56, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 35.36607687614399, |
|
"grad_norm": 2.207364559173584, |
|
"learning_rate": 2.493495989231198e-06, |
|
"loss": 0.0623, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 35.97620500305064, |
|
"grad_norm": 1.6189491748809814, |
|
"learning_rate": 1.8896100834437107e-06, |
|
"loss": 0.0608, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_cer": 1.0145803698435278, |
|
"eval_loss": 0.015181286260485649, |
|
"eval_runtime": 73.2042, |
|
"eval_samples_per_second": 93.437, |
|
"eval_steps_per_second": 11.68, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 29520 |
|
}, |
|
{ |
|
"epoch": 36.58572300183038, |
|
"grad_norm": 3.607675790786743, |
|
"learning_rate": 1.3644938278693997e-06, |
|
"loss": 0.0598, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_cer": 1.0146396396396395, |
|
"eval_loss": 0.015759674832224846, |
|
"eval_runtime": 73.8898, |
|
"eval_samples_per_second": 92.57, |
|
"eval_steps_per_second": 11.571, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 30340 |
|
}, |
|
{ |
|
"epoch": 37.195241000610125, |
|
"grad_norm": 1.4858845472335815, |
|
"learning_rate": 9.213022182052699e-07, |
|
"loss": 0.0579, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 37.80536912751678, |
|
"grad_norm": 0.11195004731416702, |
|
"learning_rate": 5.626980317060648e-07, |
|
"loss": 0.0578, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_cer": 1.0146989094357515, |
|
"eval_loss": 0.014966564252972603, |
|
"eval_runtime": 73.4635, |
|
"eval_samples_per_second": 93.107, |
|
"eval_steps_per_second": 11.638, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 31160 |
|
}, |
|
{ |
|
"epoch": 38.41488712629652, |
|
"grad_norm": 1.9088082313537598, |
|
"learning_rate": 2.912918111057888e-07, |
|
"loss": 0.0563, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_cer": 1.0147285443338074, |
|
"eval_loss": 0.015355916693806648, |
|
"eval_runtime": 63.935, |
|
"eval_samples_per_second": 106.984, |
|
"eval_steps_per_second": 13.373, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 31980 |
|
}, |
|
{ |
|
"epoch": 39.024405125076264, |
|
"grad_norm": 3.6777658462524414, |
|
"learning_rate": 1.0762696080869105e-07, |
|
"loss": 0.059, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 39.634533251982916, |
|
"grad_norm": 1.0012741088867188, |
|
"learning_rate": 1.3438245287707985e-08, |
|
"loss": 0.0591, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 39.95179987797437, |
|
"eval_cer": 1.0146692745376955, |
|
"eval_loss": 0.014642004854977131, |
|
"eval_runtime": 66.4954, |
|
"eval_samples_per_second": 102.864, |
|
"eval_steps_per_second": 12.858, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 32760 |
|
}, |
|
{ |
|
"epoch": 39.95179987797437, |
|
"step": 32760, |
|
"total_flos": 1.7574609702583173e+19, |
|
"train_loss": 1.277195220610743, |
|
"train_runtime": 29323.9298, |
|
"train_samples_per_second": 71.532, |
|
"train_steps_per_second": 1.117 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 32760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7574609702583173e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|