|
{ |
|
"best_metric": 0.6322699785232544, |
|
"best_model_checkpoint": "./Hubert-common_voice-ja-demo-kana-only-cosine/checkpoint-9400", |
|
"epoch": 25.0, |
|
"eval_steps": 100, |
|
"global_step": 9400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26595744680851063, |
|
"eval_cer": 5.820971309556485, |
|
"eval_loss": 43.225128173828125, |
|
"eval_runtime": 207.1142, |
|
"eval_samples_per_second": 23.953, |
|
"eval_steps_per_second": 2.998, |
|
"eval_wer": 1.5295303366256803, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"eval_cer": 5.284926618983806, |
|
"eval_loss": 42.44519805908203, |
|
"eval_runtime": 204.7199, |
|
"eval_samples_per_second": 24.233, |
|
"eval_steps_per_second": 3.033, |
|
"eval_wer": 1.5321507760532151, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7978723404255319, |
|
"eval_cer": 1.8125998998982837, |
|
"eval_loss": 40.415428161621094, |
|
"eval_runtime": 201.5553, |
|
"eval_samples_per_second": 24.614, |
|
"eval_steps_per_second": 3.081, |
|
"eval_wer": 1.126184237048982, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 32.8021240234375, |
|
"eval_runtime": 198.6844, |
|
"eval_samples_per_second": 24.969, |
|
"eval_steps_per_second": 3.126, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"grad_norm": 110.69781494140625, |
|
"learning_rate": 1.188e-06, |
|
"loss": 31.884, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 20.813316345214844, |
|
"eval_runtime": 217.9621, |
|
"eval_samples_per_second": 22.761, |
|
"eval_steps_per_second": 2.849, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 17.582351684570312, |
|
"eval_runtime": 200.2418, |
|
"eval_samples_per_second": 24.775, |
|
"eval_steps_per_second": 3.101, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8617021276595744, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 16.86818504333496, |
|
"eval_runtime": 200.8491, |
|
"eval_samples_per_second": 24.7, |
|
"eval_steps_per_second": 3.092, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 16.446561813354492, |
|
"eval_runtime": 206.3537, |
|
"eval_samples_per_second": 24.041, |
|
"eval_steps_per_second": 3.009, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.393617021276596, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 16.003677368164062, |
|
"eval_runtime": 203.6688, |
|
"eval_samples_per_second": 24.358, |
|
"eval_steps_per_second": 3.049, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 112.62248229980469, |
|
"learning_rate": 2.3880000000000003e-06, |
|
"loss": 14.4701, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 15.540871620178223, |
|
"eval_runtime": 205.8626, |
|
"eval_samples_per_second": 24.099, |
|
"eval_steps_per_second": 3.017, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.925531914893617, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 15.044602394104004, |
|
"eval_runtime": 203.7066, |
|
"eval_samples_per_second": 24.354, |
|
"eval_steps_per_second": 3.049, |
|
"eval_wer": 1.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 14.502347946166992, |
|
"eval_runtime": 203.5102, |
|
"eval_samples_per_second": 24.377, |
|
"eval_steps_per_second": 3.051, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.4574468085106385, |
|
"eval_cer": 0.9999031273713612, |
|
"eval_loss": 13.929794311523438, |
|
"eval_runtime": 205.0004, |
|
"eval_samples_per_second": 24.2, |
|
"eval_steps_per_second": 3.029, |
|
"eval_wer": 1.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.723404255319149, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 13.321216583251953, |
|
"eval_runtime": 206.9747, |
|
"eval_samples_per_second": 23.969, |
|
"eval_steps_per_second": 3.0, |
|
"eval_wer": 1.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"grad_norm": 89.55043029785156, |
|
"learning_rate": 3.588e-06, |
|
"loss": 12.1626, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 12.681354522705078, |
|
"eval_runtime": 204.7436, |
|
"eval_samples_per_second": 24.23, |
|
"eval_steps_per_second": 3.033, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 12.00993824005127, |
|
"eval_runtime": 205.3753, |
|
"eval_samples_per_second": 24.156, |
|
"eval_steps_per_second": 3.024, |
|
"eval_wer": 1.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.5212765957446805, |
|
"eval_cer": 0.9999031273713612, |
|
"eval_loss": 11.317916870117188, |
|
"eval_runtime": 208.5747, |
|
"eval_samples_per_second": 23.785, |
|
"eval_steps_per_second": 2.977, |
|
"eval_wer": 1.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.787234042553192, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 10.601661682128906, |
|
"eval_runtime": 205.7117, |
|
"eval_samples_per_second": 24.116, |
|
"eval_steps_per_second": 3.019, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.053191489361702, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 9.880986213684082, |
|
"eval_runtime": 206.3181, |
|
"eval_samples_per_second": 24.045, |
|
"eval_steps_per_second": 3.01, |
|
"eval_wer": 1.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 55.87970733642578, |
|
"learning_rate": 4.788e-06, |
|
"loss": 9.5127, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"eval_cer": 0.9999031273713612, |
|
"eval_loss": 9.15669059753418, |
|
"eval_runtime": 205.9181, |
|
"eval_samples_per_second": 24.092, |
|
"eval_steps_per_second": 3.016, |
|
"eval_wer": 1.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.585106382978723, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 8.444498062133789, |
|
"eval_runtime": 207.3299, |
|
"eval_samples_per_second": 23.928, |
|
"eval_steps_per_second": 2.995, |
|
"eval_wer": 1.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.851063829787234, |
|
"eval_cer": 0.9999031273713612, |
|
"eval_loss": 7.757309913635254, |
|
"eval_runtime": 207.2514, |
|
"eval_samples_per_second": 23.937, |
|
"eval_steps_per_second": 2.996, |
|
"eval_wer": 1.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.117021276595745, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 7.104926109313965, |
|
"eval_runtime": 207.5258, |
|
"eval_samples_per_second": 23.905, |
|
"eval_steps_per_second": 2.992, |
|
"eval_wer": 1.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 6.5016021728515625, |
|
"eval_runtime": 207.6536, |
|
"eval_samples_per_second": 23.891, |
|
"eval_steps_per_second": 2.991, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"grad_norm": 40.37785339355469, |
|
"learning_rate": 5.988e-06, |
|
"loss": 6.6873, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 5.956511497497559, |
|
"eval_runtime": 208.8491, |
|
"eval_samples_per_second": 23.754, |
|
"eval_steps_per_second": 2.973, |
|
"eval_wer": 1.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.914893617021277, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 5.485317707061768, |
|
"eval_runtime": 208.251, |
|
"eval_samples_per_second": 23.822, |
|
"eval_steps_per_second": 2.982, |
|
"eval_wer": 1.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.180851063829787, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 5.099748134613037, |
|
"eval_runtime": 208.5061, |
|
"eval_samples_per_second": 23.793, |
|
"eval_steps_per_second": 2.978, |
|
"eval_wer": 1.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.446808510638298, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.788908958435059, |
|
"eval_runtime": 208.0405, |
|
"eval_samples_per_second": 23.846, |
|
"eval_steps_per_second": 2.985, |
|
"eval_wer": 1.0, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.712765957446808, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.557297706604004, |
|
"eval_runtime": 206.7999, |
|
"eval_samples_per_second": 23.989, |
|
"eval_steps_per_second": 3.003, |
|
"eval_wer": 1.0, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"grad_norm": 9.712457656860352, |
|
"learning_rate": 7.1880000000000005e-06, |
|
"loss": 4.7448, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.38894510269165, |
|
"eval_runtime": 208.3141, |
|
"eval_samples_per_second": 23.815, |
|
"eval_steps_per_second": 2.981, |
|
"eval_wer": 1.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.24468085106383, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.261370658874512, |
|
"eval_runtime": 206.872, |
|
"eval_samples_per_second": 23.981, |
|
"eval_steps_per_second": 3.002, |
|
"eval_wer": 1.0, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.196043014526367, |
|
"eval_runtime": 206.4767, |
|
"eval_samples_per_second": 24.027, |
|
"eval_steps_per_second": 3.008, |
|
"eval_wer": 1.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.77659574468085, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.139795303344727, |
|
"eval_runtime": 206.4082, |
|
"eval_samples_per_second": 24.035, |
|
"eval_steps_per_second": 3.009, |
|
"eval_wer": 1.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.042553191489361, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.1092000007629395, |
|
"eval_runtime": 205.1387, |
|
"eval_samples_per_second": 24.184, |
|
"eval_steps_per_second": 3.027, |
|
"eval_wer": 1.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"grad_norm": 1.407253384590149, |
|
"learning_rate": 8.388e-06, |
|
"loss": 4.1253, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.091124534606934, |
|
"eval_runtime": 206.2573, |
|
"eval_samples_per_second": 24.052, |
|
"eval_steps_per_second": 3.011, |
|
"eval_wer": 1.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.574468085106384, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.085117816925049, |
|
"eval_runtime": 205.8565, |
|
"eval_samples_per_second": 24.099, |
|
"eval_steps_per_second": 3.017, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.840425531914894, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.070712089538574, |
|
"eval_runtime": 206.7954, |
|
"eval_samples_per_second": 23.99, |
|
"eval_steps_per_second": 3.003, |
|
"eval_wer": 1.0, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 10.106382978723405, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.062964916229248, |
|
"eval_runtime": 206.5725, |
|
"eval_samples_per_second": 24.016, |
|
"eval_steps_per_second": 3.006, |
|
"eval_wer": 1.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 10.372340425531915, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.058863639831543, |
|
"eval_runtime": 206.6395, |
|
"eval_samples_per_second": 24.008, |
|
"eval_steps_per_second": 3.005, |
|
"eval_wer": 1.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"grad_norm": 0.9817385077476501, |
|
"learning_rate": 9.588e-06, |
|
"loss": 4.0399, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.0573530197143555, |
|
"eval_runtime": 207.8093, |
|
"eval_samples_per_second": 23.873, |
|
"eval_steps_per_second": 2.988, |
|
"eval_wer": 1.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.904255319148936, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.049480438232422, |
|
"eval_runtime": 207.6873, |
|
"eval_samples_per_second": 23.887, |
|
"eval_steps_per_second": 2.99, |
|
"eval_wer": 1.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 11.170212765957446, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.03674840927124, |
|
"eval_runtime": 207.2045, |
|
"eval_samples_per_second": 23.943, |
|
"eval_steps_per_second": 2.997, |
|
"eval_wer": 1.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 11.436170212765958, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.0297136306762695, |
|
"eval_runtime": 207.9114, |
|
"eval_samples_per_second": 23.861, |
|
"eval_steps_per_second": 2.987, |
|
"eval_wer": 1.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 11.702127659574469, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.01682186126709, |
|
"eval_runtime": 214.2137, |
|
"eval_samples_per_second": 23.159, |
|
"eval_steps_per_second": 2.899, |
|
"eval_wer": 1.0, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"grad_norm": 1.0635989904403687, |
|
"learning_rate": 1.0787999999999999e-05, |
|
"loss": 4.0102, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"eval_cer": 0.9999031273713612, |
|
"eval_loss": 4.000179290771484, |
|
"eval_runtime": 208.8628, |
|
"eval_samples_per_second": 23.752, |
|
"eval_steps_per_second": 2.973, |
|
"eval_wer": 1.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.23404255319149, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 3.9823410511016846, |
|
"eval_runtime": 207.9568, |
|
"eval_samples_per_second": 23.856, |
|
"eval_steps_per_second": 2.986, |
|
"eval_wer": 1.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 3.94744873046875, |
|
"eval_runtime": 207.1171, |
|
"eval_samples_per_second": 23.953, |
|
"eval_steps_per_second": 2.998, |
|
"eval_wer": 1.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 12.76595744680851, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 3.88703989982605, |
|
"eval_runtime": 207.9782, |
|
"eval_samples_per_second": 23.853, |
|
"eval_steps_per_second": 2.986, |
|
"eval_wer": 1.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 13.03191489361702, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 3.7933156490325928, |
|
"eval_runtime": 207.9844, |
|
"eval_samples_per_second": 23.853, |
|
"eval_steps_per_second": 2.986, |
|
"eval_wer": 1.0, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"grad_norm": 2.227720260620117, |
|
"learning_rate": 1.1988000000000001e-05, |
|
"loss": 3.8616, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 3.6575610637664795, |
|
"eval_runtime": 207.9134, |
|
"eval_samples_per_second": 23.861, |
|
"eval_steps_per_second": 2.987, |
|
"eval_wer": 1.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.563829787234042, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 3.4925012588500977, |
|
"eval_runtime": 208.4794, |
|
"eval_samples_per_second": 23.796, |
|
"eval_steps_per_second": 2.979, |
|
"eval_wer": 1.0, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 13.829787234042554, |
|
"eval_cer": 0.9999031273713612, |
|
"eval_loss": 3.2550227642059326, |
|
"eval_runtime": 207.4858, |
|
"eval_samples_per_second": 23.91, |
|
"eval_steps_per_second": 2.993, |
|
"eval_wer": 1.0, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 14.095744680851064, |
|
"eval_cer": 0.8301338456819026, |
|
"eval_loss": 2.8836495876312256, |
|
"eval_runtime": 208.4061, |
|
"eval_samples_per_second": 23.804, |
|
"eval_steps_per_second": 2.98, |
|
"eval_wer": 1.0, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 14.361702127659575, |
|
"eval_cer": 0.6170624989909101, |
|
"eval_loss": 2.521071672439575, |
|
"eval_runtime": 209.1476, |
|
"eval_samples_per_second": 23.72, |
|
"eval_steps_per_second": 2.969, |
|
"eval_wer": 1.0, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"grad_norm": 8.615595817565918, |
|
"learning_rate": 1.3188e-05, |
|
"loss": 3.023, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"eval_cer": 0.5481053328382066, |
|
"eval_loss": 2.2902443408966064, |
|
"eval_runtime": 208.2855, |
|
"eval_samples_per_second": 23.818, |
|
"eval_steps_per_second": 2.981, |
|
"eval_wer": 1.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.893617021276595, |
|
"eval_cer": 0.5078628283578475, |
|
"eval_loss": 2.1006453037261963, |
|
"eval_runtime": 208.3604, |
|
"eval_samples_per_second": 23.81, |
|
"eval_steps_per_second": 2.98, |
|
"eval_wer": 1.0, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 15.159574468085106, |
|
"eval_cer": 0.47841354925165896, |
|
"eval_loss": 1.9463908672332764, |
|
"eval_runtime": 208.5724, |
|
"eval_samples_per_second": 23.786, |
|
"eval_steps_per_second": 2.977, |
|
"eval_wer": 1.0, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 15.425531914893616, |
|
"eval_cer": 0.45966869561005536, |
|
"eval_loss": 1.8196361064910889, |
|
"eval_runtime": 206.9409, |
|
"eval_samples_per_second": 23.973, |
|
"eval_steps_per_second": 3.001, |
|
"eval_wer": 1.0, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 15.691489361702128, |
|
"eval_cer": 0.4237531685422284, |
|
"eval_loss": 1.6974730491638184, |
|
"eval_runtime": 206.9424, |
|
"eval_samples_per_second": 23.973, |
|
"eval_steps_per_second": 3.001, |
|
"eval_wer": 1.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 15.957446808510639, |
|
"grad_norm": 11.626580238342285, |
|
"learning_rate": 1.4388000000000002e-05, |
|
"loss": 1.9348, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 15.957446808510639, |
|
"eval_cer": 0.4093110741559972, |
|
"eval_loss": 1.6040183305740356, |
|
"eval_runtime": 205.6257, |
|
"eval_samples_per_second": 24.126, |
|
"eval_steps_per_second": 3.02, |
|
"eval_wer": 1.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.22340425531915, |
|
"eval_cer": 0.40211828147956796, |
|
"eval_loss": 1.5034863948822021, |
|
"eval_runtime": 206.5176, |
|
"eval_samples_per_second": 24.022, |
|
"eval_steps_per_second": 3.007, |
|
"eval_wer": 1.0, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 16.48936170212766, |
|
"eval_cer": 0.3929638180732034, |
|
"eval_loss": 1.42111337184906, |
|
"eval_runtime": 206.0968, |
|
"eval_samples_per_second": 24.071, |
|
"eval_steps_per_second": 3.013, |
|
"eval_wer": 1.0, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 16.75531914893617, |
|
"eval_cer": 0.3802250674072041, |
|
"eval_loss": 1.3529201745986938, |
|
"eval_runtime": 207.5275, |
|
"eval_samples_per_second": 23.905, |
|
"eval_steps_per_second": 2.992, |
|
"eval_wer": 1.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 17.02127659574468, |
|
"eval_cer": 0.3790545231444855, |
|
"eval_loss": 1.2795100212097168, |
|
"eval_runtime": 210.6768, |
|
"eval_samples_per_second": 23.548, |
|
"eval_steps_per_second": 2.948, |
|
"eval_wer": 1.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 17.28723404255319, |
|
"grad_norm": 8.535591125488281, |
|
"learning_rate": 1.5588e-05, |
|
"loss": 1.4128, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.28723404255319, |
|
"eval_cer": 0.3710706040008396, |
|
"eval_loss": 1.219308614730835, |
|
"eval_runtime": 207.1346, |
|
"eval_samples_per_second": 23.951, |
|
"eval_steps_per_second": 2.998, |
|
"eval_wer": 1.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.5531914893617, |
|
"eval_cer": 0.3673571532363531, |
|
"eval_loss": 1.1645617485046387, |
|
"eval_runtime": 207.4547, |
|
"eval_samples_per_second": 23.914, |
|
"eval_steps_per_second": 2.993, |
|
"eval_wer": 1.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 17.819148936170212, |
|
"eval_cer": 0.37062660445291185, |
|
"eval_loss": 1.1192774772644043, |
|
"eval_runtime": 209.6296, |
|
"eval_samples_per_second": 23.666, |
|
"eval_steps_per_second": 2.962, |
|
"eval_wer": 1.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 18.085106382978722, |
|
"eval_cer": 0.3605841419506918, |
|
"eval_loss": 1.066541075706482, |
|
"eval_runtime": 212.2728, |
|
"eval_samples_per_second": 23.371, |
|
"eval_steps_per_second": 2.925, |
|
"eval_wer": 1.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 18.351063829787233, |
|
"eval_cer": 0.3590422526115246, |
|
"eval_loss": 1.0244266986846924, |
|
"eval_runtime": 211.5467, |
|
"eval_samples_per_second": 23.451, |
|
"eval_steps_per_second": 2.936, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 18.617021276595743, |
|
"grad_norm": 6.8628058433532715, |
|
"learning_rate": 1.6788e-05, |
|
"loss": 1.1012, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.617021276595743, |
|
"eval_cer": 0.35400487592230817, |
|
"eval_loss": 0.9863778352737427, |
|
"eval_runtime": 210.3223, |
|
"eval_samples_per_second": 23.588, |
|
"eval_steps_per_second": 2.953, |
|
"eval_wer": 1.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.882978723404257, |
|
"eval_cer": 0.3553691654423043, |
|
"eval_loss": 0.9577982425689697, |
|
"eval_runtime": 208.8403, |
|
"eval_samples_per_second": 23.755, |
|
"eval_steps_per_second": 2.974, |
|
"eval_wer": 1.0, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 19.148936170212767, |
|
"eval_cer": 0.350896879086814, |
|
"eval_loss": 0.9308760762214661, |
|
"eval_runtime": 211.4474, |
|
"eval_samples_per_second": 23.462, |
|
"eval_steps_per_second": 2.937, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 19.414893617021278, |
|
"eval_cer": 0.3495083714096582, |
|
"eval_loss": 0.9070402383804321, |
|
"eval_runtime": 213.7352, |
|
"eval_samples_per_second": 23.211, |
|
"eval_steps_per_second": 2.905, |
|
"eval_wer": 1.0, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 19.680851063829788, |
|
"eval_cer": 0.3470461920984226, |
|
"eval_loss": 0.8692798018455505, |
|
"eval_runtime": 203.6485, |
|
"eval_samples_per_second": 24.361, |
|
"eval_steps_per_second": 3.049, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 19.9468085106383, |
|
"grad_norm": 5.451691150665283, |
|
"learning_rate": 1.7988e-05, |
|
"loss": 0.9083, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 19.9468085106383, |
|
"eval_cer": 0.34493921242552916, |
|
"eval_loss": 0.8492410778999329, |
|
"eval_runtime": 202.6575, |
|
"eval_samples_per_second": 24.48, |
|
"eval_steps_per_second": 3.064, |
|
"eval_wer": 1.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 20.21276595744681, |
|
"eval_cer": 0.34490692154931624, |
|
"eval_loss": 0.8214272856712341, |
|
"eval_runtime": 204.0849, |
|
"eval_samples_per_second": 24.309, |
|
"eval_steps_per_second": 3.043, |
|
"eval_wer": 1.0, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 20.47872340425532, |
|
"eval_cer": 0.35004117086717146, |
|
"eval_loss": 0.821138322353363, |
|
"eval_runtime": 205.8454, |
|
"eval_samples_per_second": 24.101, |
|
"eval_steps_per_second": 3.017, |
|
"eval_wer": 1.0, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 20.74468085106383, |
|
"eval_cer": 0.34516524855901964, |
|
"eval_loss": 0.7964152693748474, |
|
"eval_runtime": 205.6355, |
|
"eval_samples_per_second": 24.125, |
|
"eval_steps_per_second": 3.02, |
|
"eval_wer": 1.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 21.01063829787234, |
|
"eval_cer": 0.34293717810032776, |
|
"eval_loss": 0.77970951795578, |
|
"eval_runtime": 207.2008, |
|
"eval_samples_per_second": 23.943, |
|
"eval_steps_per_second": 2.997, |
|
"eval_wer": 1.0, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 21.27659574468085, |
|
"grad_norm": 5.198563098907471, |
|
"learning_rate": 1.9188e-05, |
|
"loss": 0.7546, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 21.27659574468085, |
|
"eval_cer": 0.34003907196021765, |
|
"eval_loss": 0.7633516788482666, |
|
"eval_runtime": 205.0619, |
|
"eval_samples_per_second": 24.193, |
|
"eval_steps_per_second": 3.028, |
|
"eval_wer": 1.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 21.54255319148936, |
|
"eval_cer": 0.33842452814957136, |
|
"eval_loss": 0.7471381425857544, |
|
"eval_runtime": 211.6646, |
|
"eval_samples_per_second": 23.438, |
|
"eval_steps_per_second": 2.934, |
|
"eval_wer": 1.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 21.80851063829787, |
|
"eval_cer": 0.33775449246815314, |
|
"eval_loss": 0.7400262355804443, |
|
"eval_runtime": 213.7944, |
|
"eval_samples_per_second": 23.205, |
|
"eval_steps_per_second": 2.905, |
|
"eval_wer": 1.0, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 22.074468085106382, |
|
"eval_cer": 0.33898961848329756, |
|
"eval_loss": 0.7214083075523376, |
|
"eval_runtime": 205.5478, |
|
"eval_samples_per_second": 24.135, |
|
"eval_steps_per_second": 3.021, |
|
"eval_wer": 1.0, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 22.340425531914892, |
|
"eval_cer": 0.33750423817750297, |
|
"eval_loss": 0.7061555981636047, |
|
"eval_runtime": 211.7804, |
|
"eval_samples_per_second": 23.425, |
|
"eval_steps_per_second": 2.932, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 22.606382978723403, |
|
"grad_norm": 6.07712459564209, |
|
"learning_rate": 2.0388e-05, |
|
"loss": 0.651, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 22.606382978723403, |
|
"eval_cer": 0.33436395046579587, |
|
"eval_loss": 0.6972677111625671, |
|
"eval_runtime": 204.7478, |
|
"eval_samples_per_second": 24.23, |
|
"eval_steps_per_second": 3.033, |
|
"eval_wer": 1.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 22.872340425531917, |
|
"eval_cer": 0.33436395046579587, |
|
"eval_loss": 0.6929803490638733, |
|
"eval_runtime": 205.6125, |
|
"eval_samples_per_second": 24.128, |
|
"eval_steps_per_second": 3.02, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 23.138297872340427, |
|
"eval_cer": 0.33500169527100115, |
|
"eval_loss": 0.6829419136047363, |
|
"eval_runtime": 207.2537, |
|
"eval_samples_per_second": 23.937, |
|
"eval_steps_per_second": 2.996, |
|
"eval_wer": 1.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 23.404255319148938, |
|
"eval_cer": 0.3331691880459176, |
|
"eval_loss": 0.6683320999145508, |
|
"eval_runtime": 206.9839, |
|
"eval_samples_per_second": 23.968, |
|
"eval_steps_per_second": 3.0, |
|
"eval_wer": 1.0, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 23.670212765957448, |
|
"eval_cer": 0.33223275263574276, |
|
"eval_loss": 0.6596328616142273, |
|
"eval_runtime": 216.6308, |
|
"eval_samples_per_second": 22.901, |
|
"eval_steps_per_second": 2.867, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 23.93617021276596, |
|
"grad_norm": 2.6311779022216797, |
|
"learning_rate": 2.1588e-05, |
|
"loss": 0.5868, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 23.93617021276596, |
|
"eval_cer": 0.3320712982546781, |
|
"eval_loss": 0.6764070987701416, |
|
"eval_runtime": 207.8654, |
|
"eval_samples_per_second": 23.866, |
|
"eval_steps_per_second": 2.988, |
|
"eval_wer": 1.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 24.20212765957447, |
|
"eval_cer": 0.3308280995204805, |
|
"eval_loss": 0.6634973883628845, |
|
"eval_runtime": 207.5025, |
|
"eval_samples_per_second": 23.908, |
|
"eval_steps_per_second": 2.993, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 24.46808510638298, |
|
"eval_cer": 0.3323699888596477, |
|
"eval_loss": 0.6560051441192627, |
|
"eval_runtime": 207.3198, |
|
"eval_samples_per_second": 23.929, |
|
"eval_steps_per_second": 2.995, |
|
"eval_wer": 1.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 24.73404255319149, |
|
"eval_cer": 0.3290198104525566, |
|
"eval_loss": 0.6412243843078613, |
|
"eval_runtime": 207.1443, |
|
"eval_samples_per_second": 23.949, |
|
"eval_steps_per_second": 2.998, |
|
"eval_wer": 1.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.33074737232994816, |
|
"eval_loss": 0.6322699785232544, |
|
"eval_runtime": 205.6853, |
|
"eval_samples_per_second": 24.119, |
|
"eval_steps_per_second": 3.019, |
|
"eval_wer": 1.0, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"step": 9400, |
|
"total_flos": 1.314337503834538e+19, |
|
"train_loss": 5.653992247885846, |
|
"train_runtime": 50640.2984, |
|
"train_samples_per_second": 5.94, |
|
"train_steps_per_second": 0.186 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.314337503834538e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|