{ "best_metric": 0.6322699785232544, "best_model_checkpoint": "./Hubert-common_voice-ja-demo-kana-only-cosine/checkpoint-9400", "epoch": 25.0, "eval_steps": 100, "global_step": 9400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26595744680851063, "eval_cer": 5.820971309556485, "eval_loss": 43.225128173828125, "eval_runtime": 207.1142, "eval_samples_per_second": 23.953, "eval_steps_per_second": 2.998, "eval_wer": 1.5295303366256803, "step": 100 }, { "epoch": 0.5319148936170213, "eval_cer": 5.284926618983806, "eval_loss": 42.44519805908203, "eval_runtime": 204.7199, "eval_samples_per_second": 24.233, "eval_steps_per_second": 3.033, "eval_wer": 1.5321507760532151, "step": 200 }, { "epoch": 0.7978723404255319, "eval_cer": 1.8125998998982837, "eval_loss": 40.415428161621094, "eval_runtime": 201.5553, "eval_samples_per_second": 24.614, "eval_steps_per_second": 3.081, "eval_wer": 1.126184237048982, "step": 300 }, { "epoch": 1.0638297872340425, "eval_cer": 0.9998627637760951, "eval_loss": 32.8021240234375, "eval_runtime": 198.6844, "eval_samples_per_second": 24.969, "eval_steps_per_second": 3.126, "eval_wer": 1.0, "step": 400 }, { "epoch": 1.3297872340425532, "grad_norm": 110.69781494140625, "learning_rate": 1.188e-06, "loss": 31.884, "step": 500 }, { "epoch": 1.3297872340425532, "eval_cer": 0.9998627637760951, "eval_loss": 20.813316345214844, "eval_runtime": 217.9621, "eval_samples_per_second": 22.761, "eval_steps_per_second": 2.849, "eval_wer": 1.0, "step": 500 }, { "epoch": 1.5957446808510638, "eval_cer": 0.9998627637760951, "eval_loss": 17.582351684570312, "eval_runtime": 200.2418, "eval_samples_per_second": 24.775, "eval_steps_per_second": 3.101, "eval_wer": 1.0, "step": 600 }, { "epoch": 1.8617021276595744, "eval_cer": 0.9998627637760951, "eval_loss": 16.86818504333496, "eval_runtime": 200.8491, "eval_samples_per_second": 24.7, "eval_steps_per_second": 3.092, "eval_wer": 1.0, "step": 700 }, { "epoch": 2.127659574468085, "eval_cer": 0.9998627637760951, "eval_loss": 16.446561813354492, "eval_runtime": 206.3537, "eval_samples_per_second": 24.041, "eval_steps_per_second": 3.009, "eval_wer": 1.0, "step": 800 }, { "epoch": 2.393617021276596, "eval_cer": 0.9998627637760951, "eval_loss": 16.003677368164062, "eval_runtime": 203.6688, "eval_samples_per_second": 24.358, "eval_steps_per_second": 3.049, "eval_wer": 1.0, "step": 900 }, { "epoch": 2.6595744680851063, "grad_norm": 112.62248229980469, "learning_rate": 2.3880000000000003e-06, "loss": 14.4701, "step": 1000 }, { "epoch": 2.6595744680851063, "eval_cer": 0.9998627637760951, "eval_loss": 15.540871620178223, "eval_runtime": 205.8626, "eval_samples_per_second": 24.099, "eval_steps_per_second": 3.017, "eval_wer": 1.0, "step": 1000 }, { "epoch": 2.925531914893617, "eval_cer": 0.9998627637760951, "eval_loss": 15.044602394104004, "eval_runtime": 203.7066, "eval_samples_per_second": 24.354, "eval_steps_per_second": 3.049, "eval_wer": 1.0, "step": 1100 }, { "epoch": 3.1914893617021276, "eval_cer": 0.9998627637760951, "eval_loss": 14.502347946166992, "eval_runtime": 203.5102, "eval_samples_per_second": 24.377, "eval_steps_per_second": 3.051, "eval_wer": 1.0, "step": 1200 }, { "epoch": 3.4574468085106385, "eval_cer": 0.9999031273713612, "eval_loss": 13.929794311523438, "eval_runtime": 205.0004, "eval_samples_per_second": 24.2, "eval_steps_per_second": 3.029, "eval_wer": 1.0, "step": 1300 }, { "epoch": 3.723404255319149, "eval_cer": 0.9998627637760951, "eval_loss": 13.321216583251953, "eval_runtime": 206.9747, "eval_samples_per_second": 23.969, "eval_steps_per_second": 3.0, "eval_wer": 1.0, "step": 1400 }, { "epoch": 3.9893617021276597, "grad_norm": 89.55043029785156, "learning_rate": 3.588e-06, "loss": 12.1626, "step": 1500 }, { "epoch": 3.9893617021276597, "eval_cer": 0.9998627637760951, "eval_loss": 12.681354522705078, "eval_runtime": 204.7436, "eval_samples_per_second": 24.23, "eval_steps_per_second": 3.033, "eval_wer": 1.0, "step": 1500 }, { "epoch": 4.25531914893617, "eval_cer": 0.9998627637760951, "eval_loss": 12.00993824005127, "eval_runtime": 205.3753, "eval_samples_per_second": 24.156, "eval_steps_per_second": 3.024, "eval_wer": 1.0, "step": 1600 }, { "epoch": 4.5212765957446805, "eval_cer": 0.9999031273713612, "eval_loss": 11.317916870117188, "eval_runtime": 208.5747, "eval_samples_per_second": 23.785, "eval_steps_per_second": 2.977, "eval_wer": 1.0, "step": 1700 }, { "epoch": 4.787234042553192, "eval_cer": 0.9998627637760951, "eval_loss": 10.601661682128906, "eval_runtime": 205.7117, "eval_samples_per_second": 24.116, "eval_steps_per_second": 3.019, "eval_wer": 1.0, "step": 1800 }, { "epoch": 5.053191489361702, "eval_cer": 0.9998627637760951, "eval_loss": 9.880986213684082, "eval_runtime": 206.3181, "eval_samples_per_second": 24.045, "eval_steps_per_second": 3.01, "eval_wer": 1.0, "step": 1900 }, { "epoch": 5.319148936170213, "grad_norm": 55.87970733642578, "learning_rate": 4.788e-06, "loss": 9.5127, "step": 2000 }, { "epoch": 5.319148936170213, "eval_cer": 0.9999031273713612, "eval_loss": 9.15669059753418, "eval_runtime": 205.9181, "eval_samples_per_second": 24.092, "eval_steps_per_second": 3.016, "eval_wer": 1.0, "step": 2000 }, { "epoch": 5.585106382978723, "eval_cer": 0.9998627637760951, "eval_loss": 8.444498062133789, "eval_runtime": 207.3299, "eval_samples_per_second": 23.928, "eval_steps_per_second": 2.995, "eval_wer": 1.0, "step": 2100 }, { "epoch": 5.851063829787234, "eval_cer": 0.9999031273713612, "eval_loss": 7.757309913635254, "eval_runtime": 207.2514, "eval_samples_per_second": 23.937, "eval_steps_per_second": 2.996, "eval_wer": 1.0, "step": 2200 }, { "epoch": 6.117021276595745, "eval_cer": 0.9998627637760951, "eval_loss": 7.104926109313965, "eval_runtime": 207.5258, "eval_samples_per_second": 23.905, "eval_steps_per_second": 2.992, "eval_wer": 1.0, "step": 2300 }, { "epoch": 6.382978723404255, "eval_cer": 0.9998627637760951, "eval_loss": 6.5016021728515625, "eval_runtime": 207.6536, "eval_samples_per_second": 23.891, "eval_steps_per_second": 2.991, "eval_wer": 1.0, "step": 2400 }, { "epoch": 6.648936170212766, "grad_norm": 40.37785339355469, "learning_rate": 5.988e-06, "loss": 6.6873, "step": 2500 }, { "epoch": 6.648936170212766, "eval_cer": 0.9998627637760951, "eval_loss": 5.956511497497559, "eval_runtime": 208.8491, "eval_samples_per_second": 23.754, "eval_steps_per_second": 2.973, "eval_wer": 1.0, "step": 2500 }, { "epoch": 6.914893617021277, "eval_cer": 0.9998627637760951, "eval_loss": 5.485317707061768, "eval_runtime": 208.251, "eval_samples_per_second": 23.822, "eval_steps_per_second": 2.982, "eval_wer": 1.0, "step": 2600 }, { "epoch": 7.180851063829787, "eval_cer": 0.9998627637760951, "eval_loss": 5.099748134613037, "eval_runtime": 208.5061, "eval_samples_per_second": 23.793, "eval_steps_per_second": 2.978, "eval_wer": 1.0, "step": 2700 }, { "epoch": 7.446808510638298, "eval_cer": 0.9998627637760951, "eval_loss": 4.788908958435059, "eval_runtime": 208.0405, "eval_samples_per_second": 23.846, "eval_steps_per_second": 2.985, "eval_wer": 1.0, "step": 2800 }, { "epoch": 7.712765957446808, "eval_cer": 0.9998627637760951, "eval_loss": 4.557297706604004, "eval_runtime": 206.7999, "eval_samples_per_second": 23.989, "eval_steps_per_second": 3.003, "eval_wer": 1.0, "step": 2900 }, { "epoch": 7.9787234042553195, "grad_norm": 9.712457656860352, "learning_rate": 7.1880000000000005e-06, "loss": 4.7448, "step": 3000 }, { "epoch": 7.9787234042553195, "eval_cer": 0.9998627637760951, "eval_loss": 4.38894510269165, "eval_runtime": 208.3141, "eval_samples_per_second": 23.815, "eval_steps_per_second": 2.981, "eval_wer": 1.0, "step": 3000 }, { "epoch": 8.24468085106383, "eval_cer": 0.9998627637760951, "eval_loss": 4.261370658874512, "eval_runtime": 206.872, "eval_samples_per_second": 23.981, "eval_steps_per_second": 3.002, "eval_wer": 1.0, "step": 3100 }, { "epoch": 8.51063829787234, "eval_cer": 0.9998627637760951, "eval_loss": 4.196043014526367, "eval_runtime": 206.4767, "eval_samples_per_second": 24.027, "eval_steps_per_second": 3.008, "eval_wer": 1.0, "step": 3200 }, { "epoch": 8.77659574468085, "eval_cer": 0.9998627637760951, "eval_loss": 4.139795303344727, "eval_runtime": 206.4082, "eval_samples_per_second": 24.035, "eval_steps_per_second": 3.009, "eval_wer": 1.0, "step": 3300 }, { "epoch": 9.042553191489361, "eval_cer": 0.9998627637760951, "eval_loss": 4.1092000007629395, "eval_runtime": 205.1387, "eval_samples_per_second": 24.184, "eval_steps_per_second": 3.027, "eval_wer": 1.0, "step": 3400 }, { "epoch": 9.308510638297872, "grad_norm": 1.407253384590149, "learning_rate": 8.388e-06, "loss": 4.1253, "step": 3500 }, { "epoch": 9.308510638297872, "eval_cer": 0.9998627637760951, "eval_loss": 4.091124534606934, "eval_runtime": 206.2573, "eval_samples_per_second": 24.052, "eval_steps_per_second": 3.011, "eval_wer": 1.0, "step": 3500 }, { "epoch": 9.574468085106384, "eval_cer": 0.9998627637760951, "eval_loss": 4.085117816925049, "eval_runtime": 205.8565, "eval_samples_per_second": 24.099, "eval_steps_per_second": 3.017, "eval_wer": 1.0, "step": 3600 }, { "epoch": 9.840425531914894, "eval_cer": 0.9998627637760951, "eval_loss": 4.070712089538574, "eval_runtime": 206.7954, "eval_samples_per_second": 23.99, "eval_steps_per_second": 3.003, "eval_wer": 1.0, "step": 3700 }, { "epoch": 10.106382978723405, "eval_cer": 0.9998627637760951, "eval_loss": 4.062964916229248, "eval_runtime": 206.5725, "eval_samples_per_second": 24.016, "eval_steps_per_second": 3.006, "eval_wer": 1.0, "step": 3800 }, { "epoch": 10.372340425531915, "eval_cer": 0.9998627637760951, "eval_loss": 4.058863639831543, "eval_runtime": 206.6395, "eval_samples_per_second": 24.008, "eval_steps_per_second": 3.005, "eval_wer": 1.0, "step": 3900 }, { "epoch": 10.638297872340425, "grad_norm": 0.9817385077476501, "learning_rate": 9.588e-06, "loss": 4.0399, "step": 4000 }, { "epoch": 10.638297872340425, "eval_cer": 0.9998627637760951, "eval_loss": 4.0573530197143555, "eval_runtime": 207.8093, "eval_samples_per_second": 23.873, "eval_steps_per_second": 2.988, "eval_wer": 1.0, "step": 4000 }, { "epoch": 10.904255319148936, "eval_cer": 0.9998627637760951, "eval_loss": 4.049480438232422, "eval_runtime": 207.6873, "eval_samples_per_second": 23.887, "eval_steps_per_second": 2.99, "eval_wer": 1.0, "step": 4100 }, { "epoch": 11.170212765957446, "eval_cer": 0.9998627637760951, "eval_loss": 4.03674840927124, "eval_runtime": 207.2045, "eval_samples_per_second": 23.943, "eval_steps_per_second": 2.997, "eval_wer": 1.0, "step": 4200 }, { "epoch": 11.436170212765958, "eval_cer": 0.9998627637760951, "eval_loss": 4.0297136306762695, "eval_runtime": 207.9114, "eval_samples_per_second": 23.861, "eval_steps_per_second": 2.987, "eval_wer": 1.0, "step": 4300 }, { "epoch": 11.702127659574469, "eval_cer": 0.9998627637760951, "eval_loss": 4.01682186126709, "eval_runtime": 214.2137, "eval_samples_per_second": 23.159, "eval_steps_per_second": 2.899, "eval_wer": 1.0, "step": 4400 }, { "epoch": 11.96808510638298, "grad_norm": 1.0635989904403687, "learning_rate": 1.0787999999999999e-05, "loss": 4.0102, "step": 4500 }, { "epoch": 11.96808510638298, "eval_cer": 0.9999031273713612, "eval_loss": 4.000179290771484, "eval_runtime": 208.8628, "eval_samples_per_second": 23.752, "eval_steps_per_second": 2.973, "eval_wer": 1.0, "step": 4500 }, { "epoch": 12.23404255319149, "eval_cer": 0.9998627637760951, "eval_loss": 3.9823410511016846, "eval_runtime": 207.9568, "eval_samples_per_second": 23.856, "eval_steps_per_second": 2.986, "eval_wer": 1.0, "step": 4600 }, { "epoch": 12.5, "eval_cer": 0.9998627637760951, "eval_loss": 3.94744873046875, "eval_runtime": 207.1171, "eval_samples_per_second": 23.953, "eval_steps_per_second": 2.998, "eval_wer": 1.0, "step": 4700 }, { "epoch": 12.76595744680851, "eval_cer": 0.9998627637760951, "eval_loss": 3.88703989982605, "eval_runtime": 207.9782, "eval_samples_per_second": 23.853, "eval_steps_per_second": 2.986, "eval_wer": 1.0, "step": 4800 }, { "epoch": 13.03191489361702, "eval_cer": 0.9998627637760951, "eval_loss": 3.7933156490325928, "eval_runtime": 207.9844, "eval_samples_per_second": 23.853, "eval_steps_per_second": 2.986, "eval_wer": 1.0, "step": 4900 }, { "epoch": 13.297872340425531, "grad_norm": 2.227720260620117, "learning_rate": 1.1988000000000001e-05, "loss": 3.8616, "step": 5000 }, { "epoch": 13.297872340425531, "eval_cer": 0.9998627637760951, "eval_loss": 3.6575610637664795, "eval_runtime": 207.9134, "eval_samples_per_second": 23.861, "eval_steps_per_second": 2.987, "eval_wer": 1.0, "step": 5000 }, { "epoch": 13.563829787234042, "eval_cer": 0.9998627637760951, "eval_loss": 3.4925012588500977, "eval_runtime": 208.4794, "eval_samples_per_second": 23.796, "eval_steps_per_second": 2.979, "eval_wer": 1.0, "step": 5100 }, { "epoch": 13.829787234042554, "eval_cer": 0.9999031273713612, "eval_loss": 3.2550227642059326, "eval_runtime": 207.4858, "eval_samples_per_second": 23.91, "eval_steps_per_second": 2.993, "eval_wer": 1.0, "step": 5200 }, { "epoch": 14.095744680851064, "eval_cer": 0.8301338456819026, "eval_loss": 2.8836495876312256, "eval_runtime": 208.4061, "eval_samples_per_second": 23.804, "eval_steps_per_second": 2.98, "eval_wer": 1.0, "step": 5300 }, { "epoch": 14.361702127659575, "eval_cer": 0.6170624989909101, "eval_loss": 2.521071672439575, "eval_runtime": 209.1476, "eval_samples_per_second": 23.72, "eval_steps_per_second": 2.969, "eval_wer": 1.0, "step": 5400 }, { "epoch": 14.627659574468085, "grad_norm": 8.615595817565918, "learning_rate": 1.3188e-05, "loss": 3.023, "step": 5500 }, { "epoch": 14.627659574468085, "eval_cer": 0.5481053328382066, "eval_loss": 2.2902443408966064, "eval_runtime": 208.2855, "eval_samples_per_second": 23.818, "eval_steps_per_second": 2.981, "eval_wer": 1.0, "step": 5500 }, { "epoch": 14.893617021276595, "eval_cer": 0.5078628283578475, "eval_loss": 2.1006453037261963, "eval_runtime": 208.3604, "eval_samples_per_second": 23.81, "eval_steps_per_second": 2.98, "eval_wer": 1.0, "step": 5600 }, { "epoch": 15.159574468085106, "eval_cer": 0.47841354925165896, "eval_loss": 1.9463908672332764, "eval_runtime": 208.5724, "eval_samples_per_second": 23.786, "eval_steps_per_second": 2.977, "eval_wer": 1.0, "step": 5700 }, { "epoch": 15.425531914893616, "eval_cer": 0.45966869561005536, "eval_loss": 1.8196361064910889, "eval_runtime": 206.9409, "eval_samples_per_second": 23.973, "eval_steps_per_second": 3.001, "eval_wer": 1.0, "step": 5800 }, { "epoch": 15.691489361702128, "eval_cer": 0.4237531685422284, "eval_loss": 1.6974730491638184, "eval_runtime": 206.9424, "eval_samples_per_second": 23.973, "eval_steps_per_second": 3.001, "eval_wer": 1.0, "step": 5900 }, { "epoch": 15.957446808510639, "grad_norm": 11.626580238342285, "learning_rate": 1.4388000000000002e-05, "loss": 1.9348, "step": 6000 }, { "epoch": 15.957446808510639, "eval_cer": 0.4093110741559972, "eval_loss": 1.6040183305740356, "eval_runtime": 205.6257, "eval_samples_per_second": 24.126, "eval_steps_per_second": 3.02, "eval_wer": 1.0, "step": 6000 }, { "epoch": 16.22340425531915, "eval_cer": 0.40211828147956796, "eval_loss": 1.5034863948822021, "eval_runtime": 206.5176, "eval_samples_per_second": 24.022, "eval_steps_per_second": 3.007, "eval_wer": 1.0, "step": 6100 }, { "epoch": 16.48936170212766, "eval_cer": 0.3929638180732034, "eval_loss": 1.42111337184906, "eval_runtime": 206.0968, "eval_samples_per_second": 24.071, "eval_steps_per_second": 3.013, "eval_wer": 1.0, "step": 6200 }, { "epoch": 16.75531914893617, "eval_cer": 0.3802250674072041, "eval_loss": 1.3529201745986938, "eval_runtime": 207.5275, "eval_samples_per_second": 23.905, "eval_steps_per_second": 2.992, "eval_wer": 1.0, "step": 6300 }, { "epoch": 17.02127659574468, "eval_cer": 0.3790545231444855, "eval_loss": 1.2795100212097168, "eval_runtime": 210.6768, "eval_samples_per_second": 23.548, "eval_steps_per_second": 2.948, "eval_wer": 1.0, "step": 6400 }, { "epoch": 17.28723404255319, "grad_norm": 8.535591125488281, "learning_rate": 1.5588e-05, "loss": 1.4128, "step": 6500 }, { "epoch": 17.28723404255319, "eval_cer": 0.3710706040008396, "eval_loss": 1.219308614730835, "eval_runtime": 207.1346, "eval_samples_per_second": 23.951, "eval_steps_per_second": 2.998, "eval_wer": 1.0, "step": 6500 }, { "epoch": 17.5531914893617, "eval_cer": 0.3673571532363531, "eval_loss": 1.1645617485046387, "eval_runtime": 207.4547, "eval_samples_per_second": 23.914, "eval_steps_per_second": 2.993, "eval_wer": 1.0, "step": 6600 }, { "epoch": 17.819148936170212, "eval_cer": 0.37062660445291185, "eval_loss": 1.1192774772644043, "eval_runtime": 209.6296, "eval_samples_per_second": 23.666, "eval_steps_per_second": 2.962, "eval_wer": 1.0, "step": 6700 }, { "epoch": 18.085106382978722, "eval_cer": 0.3605841419506918, "eval_loss": 1.066541075706482, "eval_runtime": 212.2728, "eval_samples_per_second": 23.371, "eval_steps_per_second": 2.925, "eval_wer": 1.0, "step": 6800 }, { "epoch": 18.351063829787233, "eval_cer": 0.3590422526115246, "eval_loss": 1.0244266986846924, "eval_runtime": 211.5467, "eval_samples_per_second": 23.451, "eval_steps_per_second": 2.936, "eval_wer": 0.9997984277363435, "step": 6900 }, { "epoch": 18.617021276595743, "grad_norm": 6.8628058433532715, "learning_rate": 1.6788e-05, "loss": 1.1012, "step": 7000 }, { "epoch": 18.617021276595743, "eval_cer": 0.35400487592230817, "eval_loss": 0.9863778352737427, "eval_runtime": 210.3223, "eval_samples_per_second": 23.588, "eval_steps_per_second": 2.953, "eval_wer": 1.0, "step": 7000 }, { "epoch": 18.882978723404257, "eval_cer": 0.3553691654423043, "eval_loss": 0.9577982425689697, "eval_runtime": 208.8403, "eval_samples_per_second": 23.755, "eval_steps_per_second": 2.974, "eval_wer": 1.0, "step": 7100 }, { "epoch": 19.148936170212767, "eval_cer": 0.350896879086814, "eval_loss": 0.9308760762214661, "eval_runtime": 211.4474, "eval_samples_per_second": 23.462, "eval_steps_per_second": 2.937, "eval_wer": 0.9997984277363435, "step": 7200 }, { "epoch": 19.414893617021278, "eval_cer": 0.3495083714096582, "eval_loss": 0.9070402383804321, "eval_runtime": 213.7352, "eval_samples_per_second": 23.211, "eval_steps_per_second": 2.905, "eval_wer": 1.0, "step": 7300 }, { "epoch": 19.680851063829788, "eval_cer": 0.3470461920984226, "eval_loss": 0.8692798018455505, "eval_runtime": 203.6485, "eval_samples_per_second": 24.361, "eval_steps_per_second": 3.049, "eval_wer": 0.9997984277363435, "step": 7400 }, { "epoch": 19.9468085106383, "grad_norm": 5.451691150665283, "learning_rate": 1.7988e-05, "loss": 0.9083, "step": 7500 }, { "epoch": 19.9468085106383, "eval_cer": 0.34493921242552916, "eval_loss": 0.8492410778999329, "eval_runtime": 202.6575, "eval_samples_per_second": 24.48, "eval_steps_per_second": 3.064, "eval_wer": 1.0, "step": 7500 }, { "epoch": 20.21276595744681, "eval_cer": 0.34490692154931624, "eval_loss": 0.8214272856712341, "eval_runtime": 204.0849, "eval_samples_per_second": 24.309, "eval_steps_per_second": 3.043, "eval_wer": 1.0, "step": 7600 }, { "epoch": 20.47872340425532, "eval_cer": 0.35004117086717146, "eval_loss": 0.821138322353363, "eval_runtime": 205.8454, "eval_samples_per_second": 24.101, "eval_steps_per_second": 3.017, "eval_wer": 1.0, "step": 7700 }, { "epoch": 20.74468085106383, "eval_cer": 0.34516524855901964, "eval_loss": 0.7964152693748474, "eval_runtime": 205.6355, "eval_samples_per_second": 24.125, "eval_steps_per_second": 3.02, "eval_wer": 1.0, "step": 7800 }, { "epoch": 21.01063829787234, "eval_cer": 0.34293717810032776, "eval_loss": 0.77970951795578, "eval_runtime": 207.2008, "eval_samples_per_second": 23.943, "eval_steps_per_second": 2.997, "eval_wer": 1.0, "step": 7900 }, { "epoch": 21.27659574468085, "grad_norm": 5.198563098907471, "learning_rate": 1.9188e-05, "loss": 0.7546, "step": 8000 }, { "epoch": 21.27659574468085, "eval_cer": 0.34003907196021765, "eval_loss": 0.7633516788482666, "eval_runtime": 205.0619, "eval_samples_per_second": 24.193, "eval_steps_per_second": 3.028, "eval_wer": 1.0, "step": 8000 }, { "epoch": 21.54255319148936, "eval_cer": 0.33842452814957136, "eval_loss": 0.7471381425857544, "eval_runtime": 211.6646, "eval_samples_per_second": 23.438, "eval_steps_per_second": 2.934, "eval_wer": 1.0, "step": 8100 }, { "epoch": 21.80851063829787, "eval_cer": 0.33775449246815314, "eval_loss": 0.7400262355804443, "eval_runtime": 213.7944, "eval_samples_per_second": 23.205, "eval_steps_per_second": 2.905, "eval_wer": 1.0, "step": 8200 }, { "epoch": 22.074468085106382, "eval_cer": 0.33898961848329756, "eval_loss": 0.7214083075523376, "eval_runtime": 205.5478, "eval_samples_per_second": 24.135, "eval_steps_per_second": 3.021, "eval_wer": 1.0, "step": 8300 }, { "epoch": 22.340425531914892, "eval_cer": 0.33750423817750297, "eval_loss": 0.7061555981636047, "eval_runtime": 211.7804, "eval_samples_per_second": 23.425, "eval_steps_per_second": 2.932, "eval_wer": 0.9997984277363435, "step": 8400 }, { "epoch": 22.606382978723403, "grad_norm": 6.07712459564209, "learning_rate": 2.0388e-05, "loss": 0.651, "step": 8500 }, { "epoch": 22.606382978723403, "eval_cer": 0.33436395046579587, "eval_loss": 0.6972677111625671, "eval_runtime": 204.7478, "eval_samples_per_second": 24.23, "eval_steps_per_second": 3.033, "eval_wer": 1.0, "step": 8500 }, { "epoch": 22.872340425531917, "eval_cer": 0.33436395046579587, "eval_loss": 0.6929803490638733, "eval_runtime": 205.6125, "eval_samples_per_second": 24.128, "eval_steps_per_second": 3.02, "eval_wer": 0.9997984277363435, "step": 8600 }, { "epoch": 23.138297872340427, "eval_cer": 0.33500169527100115, "eval_loss": 0.6829419136047363, "eval_runtime": 207.2537, "eval_samples_per_second": 23.937, "eval_steps_per_second": 2.996, "eval_wer": 1.0, "step": 8700 }, { "epoch": 23.404255319148938, "eval_cer": 0.3331691880459176, "eval_loss": 0.6683320999145508, "eval_runtime": 206.9839, "eval_samples_per_second": 23.968, "eval_steps_per_second": 3.0, "eval_wer": 1.0, "step": 8800 }, { "epoch": 23.670212765957448, "eval_cer": 0.33223275263574276, "eval_loss": 0.6596328616142273, "eval_runtime": 216.6308, "eval_samples_per_second": 22.901, "eval_steps_per_second": 2.867, "eval_wer": 0.9997984277363435, "step": 8900 }, { "epoch": 23.93617021276596, "grad_norm": 2.6311779022216797, "learning_rate": 2.1588e-05, "loss": 0.5868, "step": 9000 }, { "epoch": 23.93617021276596, "eval_cer": 0.3320712982546781, "eval_loss": 0.6764070987701416, "eval_runtime": 207.8654, "eval_samples_per_second": 23.866, "eval_steps_per_second": 2.988, "eval_wer": 1.0, "step": 9000 }, { "epoch": 24.20212765957447, "eval_cer": 0.3308280995204805, "eval_loss": 0.6634973883628845, "eval_runtime": 207.5025, "eval_samples_per_second": 23.908, "eval_steps_per_second": 2.993, "eval_wer": 0.9997984277363435, "step": 9100 }, { "epoch": 24.46808510638298, "eval_cer": 0.3323699888596477, "eval_loss": 0.6560051441192627, "eval_runtime": 207.3198, "eval_samples_per_second": 23.929, "eval_steps_per_second": 2.995, "eval_wer": 1.0, "step": 9200 }, { "epoch": 24.73404255319149, "eval_cer": 0.3290198104525566, "eval_loss": 0.6412243843078613, "eval_runtime": 207.1443, "eval_samples_per_second": 23.949, "eval_steps_per_second": 2.998, "eval_wer": 1.0, "step": 9300 }, { "epoch": 25.0, "eval_cer": 0.33074737232994816, "eval_loss": 0.6322699785232544, "eval_runtime": 205.6853, "eval_samples_per_second": 24.119, "eval_steps_per_second": 3.019, "eval_wer": 1.0, "step": 9400 }, { "epoch": 25.0, "step": 9400, "total_flos": 1.314337503834538e+19, "train_loss": 5.653992247885846, "train_runtime": 50640.2984, "train_samples_per_second": 5.94, "train_steps_per_second": 0.186 } ], "logging_steps": 500, "max_steps": 9400, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.314337503834538e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }