diff --git "a/checkpoint-17000/trainer_state.json" "b/checkpoint-17000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-17000/trainer_state.json" @@ -0,0 +1,12273 @@ +{ + "best_metric": 8.832933653077538, + "best_model_checkpoint": "kotoba_v2_enc_logs_epoch2_2/checkpoint-15000", + "epoch": 0.05825389101725, + "eval_steps": 500, + "global_step": 17000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 3.4266994716029415e-05, + "grad_norm": 1.0561553239822388, + "learning_rate": 1e-05, + "loss": 0.2361, + "step": 10 + }, + { + "epoch": 6.853398943205883e-05, + "grad_norm": 1.1626238822937012, + "learning_rate": 1e-05, + "loss": 0.2265, + "step": 20 + }, + { + "epoch": 0.00010280098414808825, + "grad_norm": 0.9845689535140991, + "learning_rate": 1e-05, + "loss": 0.2279, + "step": 30 + }, + { + "epoch": 0.00013706797886411766, + "grad_norm": 1.142356276512146, + "learning_rate": 1e-05, + "loss": 0.2382, + "step": 40 + }, + { + "epoch": 0.00017133497358014707, + "grad_norm": 1.0053240060806274, + "learning_rate": 1e-05, + "loss": 0.2473, + "step": 50 + }, + { + "epoch": 0.0002056019682961765, + "grad_norm": 1.1098105907440186, + "learning_rate": 1e-05, + "loss": 0.2438, + "step": 60 + }, + { + "epoch": 0.0002398689630122059, + "grad_norm": 1.191983699798584, + "learning_rate": 1e-05, + "loss": 0.2293, + "step": 70 + }, + { + "epoch": 0.0002741359577282353, + "grad_norm": 1.1295104026794434, + "learning_rate": 1e-05, + "loss": 0.2362, + "step": 80 + }, + { + "epoch": 0.0003084029524442647, + "grad_norm": 1.037972092628479, + "learning_rate": 1e-05, + "loss": 0.2455, + "step": 90 + }, + { + "epoch": 0.00034266994716029413, + "grad_norm": 1.1975648403167725, + "learning_rate": 1e-05, + "loss": 0.2459, + "step": 100 + }, + { + "epoch": 0.00037693694187632354, + "grad_norm": 1.0676342248916626, + "learning_rate": 1e-05, + "loss": 0.2271, + "step": 110 + }, + { + "epoch": 0.000411203936592353, + "grad_norm": 1.0749495029449463, + "learning_rate": 1e-05, + "loss": 0.2417, + "step": 120 + }, + { + "epoch": 0.0004454709313083824, + "grad_norm": 1.094260811805725, + "learning_rate": 1e-05, + "loss": 0.2354, + "step": 130 + }, + { + "epoch": 0.0004797379260244118, + "grad_norm": 1.0395853519439697, + "learning_rate": 1e-05, + "loss": 0.2381, + "step": 140 + }, + { + "epoch": 0.0005140049207404412, + "grad_norm": 1.2008885145187378, + "learning_rate": 1e-05, + "loss": 0.2354, + "step": 150 + }, + { + "epoch": 0.0005482719154564706, + "grad_norm": 1.0647832155227661, + "learning_rate": 1e-05, + "loss": 0.2321, + "step": 160 + }, + { + "epoch": 0.0005825389101725, + "grad_norm": 1.327071189880371, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 170 + }, + { + "epoch": 0.0006168059048885295, + "grad_norm": 1.1184055805206299, + "learning_rate": 1e-05, + "loss": 0.2242, + "step": 180 + }, + { + "epoch": 0.0006510728996045589, + "grad_norm": 1.2512784004211426, + "learning_rate": 1e-05, + "loss": 0.2437, + "step": 190 + }, + { + "epoch": 0.0006853398943205883, + "grad_norm": 1.0614465475082397, + "learning_rate": 1e-05, + "loss": 0.2382, + "step": 200 + }, + { + "epoch": 0.0007196068890366177, + "grad_norm": 1.0607149600982666, + "learning_rate": 1e-05, + "loss": 0.2381, + "step": 210 + }, + { + "epoch": 0.0007538738837526471, + "grad_norm": 1.0422028303146362, + "learning_rate": 1e-05, + "loss": 0.2294, + "step": 220 + }, + { + "epoch": 0.0007881408784686765, + "grad_norm": 1.0162984132766724, + "learning_rate": 1e-05, + "loss": 0.2275, + "step": 230 + }, + { + "epoch": 0.000822407873184706, + "grad_norm": 1.1085543632507324, + "learning_rate": 1e-05, + "loss": 0.2161, + "step": 240 + }, + { + "epoch": 0.0008566748679007354, + "grad_norm": 1.1854636669158936, + "learning_rate": 1e-05, + "loss": 0.2382, + "step": 250 + }, + { + "epoch": 0.0008909418626167648, + "grad_norm": 1.40137779712677, + "learning_rate": 1e-05, + "loss": 0.2579, + "step": 260 + }, + { + "epoch": 0.0009252088573327942, + "grad_norm": 1.0814112424850464, + "learning_rate": 1e-05, + "loss": 0.2612, + "step": 270 + }, + { + "epoch": 0.0009594758520488236, + "grad_norm": 1.083736538887024, + "learning_rate": 1e-05, + "loss": 0.2711, + "step": 280 + }, + { + "epoch": 0.000993742846764853, + "grad_norm": 1.0861411094665527, + "learning_rate": 1e-05, + "loss": 0.2642, + "step": 290 + }, + { + "epoch": 0.0010280098414808825, + "grad_norm": 1.1141265630722046, + "learning_rate": 1e-05, + "loss": 0.2585, + "step": 300 + }, + { + "epoch": 0.0010622768361969119, + "grad_norm": 1.326241374015808, + "learning_rate": 1e-05, + "loss": 0.2858, + "step": 310 + }, + { + "epoch": 0.0010965438309129413, + "grad_norm": 1.393750786781311, + "learning_rate": 1e-05, + "loss": 0.2635, + "step": 320 + }, + { + "epoch": 0.0011308108256289707, + "grad_norm": 1.0851459503173828, + "learning_rate": 1e-05, + "loss": 0.2565, + "step": 330 + }, + { + "epoch": 0.001165077820345, + "grad_norm": 1.2323757410049438, + "learning_rate": 1e-05, + "loss": 0.2465, + "step": 340 + }, + { + "epoch": 0.0011993448150610295, + "grad_norm": 1.376953125, + "learning_rate": 1e-05, + "loss": 0.2671, + "step": 350 + }, + { + "epoch": 0.001233611809777059, + "grad_norm": 1.084592580795288, + "learning_rate": 1e-05, + "loss": 0.2643, + "step": 360 + }, + { + "epoch": 0.0012678788044930883, + "grad_norm": 1.2907005548477173, + "learning_rate": 1e-05, + "loss": 0.2584, + "step": 370 + }, + { + "epoch": 0.0013021457992091177, + "grad_norm": 1.0698130130767822, + "learning_rate": 1e-05, + "loss": 0.2526, + "step": 380 + }, + { + "epoch": 0.0013364127939251471, + "grad_norm": 1.1399807929992676, + "learning_rate": 1e-05, + "loss": 0.2759, + "step": 390 + }, + { + "epoch": 0.0013706797886411765, + "grad_norm": 1.1480791568756104, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 400 + }, + { + "epoch": 0.001404946783357206, + "grad_norm": 1.3095237016677856, + "learning_rate": 1e-05, + "loss": 0.2536, + "step": 410 + }, + { + "epoch": 0.0014392137780732353, + "grad_norm": 1.068246841430664, + "learning_rate": 1e-05, + "loss": 0.2604, + "step": 420 + }, + { + "epoch": 0.0014734807727892648, + "grad_norm": 1.2310419082641602, + "learning_rate": 1e-05, + "loss": 0.2632, + "step": 430 + }, + { + "epoch": 0.0015077477675052942, + "grad_norm": 1.161867380142212, + "learning_rate": 1e-05, + "loss": 0.2584, + "step": 440 + }, + { + "epoch": 0.0015420147622213236, + "grad_norm": 1.1461217403411865, + "learning_rate": 1e-05, + "loss": 0.2592, + "step": 450 + }, + { + "epoch": 0.001576281756937353, + "grad_norm": 1.3006030321121216, + "learning_rate": 1e-05, + "loss": 0.2607, + "step": 460 + }, + { + "epoch": 0.0016105487516533824, + "grad_norm": 1.1223125457763672, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 470 + }, + { + "epoch": 0.001644815746369412, + "grad_norm": 1.2909380197525024, + "learning_rate": 1e-05, + "loss": 0.2693, + "step": 480 + }, + { + "epoch": 0.0016790827410854414, + "grad_norm": 1.2270597219467163, + "learning_rate": 1e-05, + "loss": 0.2661, + "step": 490 + }, + { + "epoch": 0.0017133497358014708, + "grad_norm": 1.1439770460128784, + "learning_rate": 1e-05, + "loss": 0.2517, + "step": 500 + }, + { + "epoch": 0.0017133497358014708, + "eval_cer": 13.0358087846181, + "eval_loss": 0.25224336981773376, + "eval_normalized_cer": 9.4224620303757, + "eval_runtime": 227.2174, + "eval_samples_per_second": 2.253, + "eval_steps_per_second": 0.035, + "step": 500 + }, + { + "epoch": 0.0017476167305175002, + "grad_norm": 1.1377454996109009, + "learning_rate": 1e-05, + "loss": 0.2579, + "step": 510 + }, + { + "epoch": 0.0017818837252335296, + "grad_norm": 1.2096498012542725, + "learning_rate": 1e-05, + "loss": 0.2727, + "step": 520 + }, + { + "epoch": 0.001816150719949559, + "grad_norm": 1.187213659286499, + "learning_rate": 1e-05, + "loss": 0.2562, + "step": 530 + }, + { + "epoch": 0.0018504177146655885, + "grad_norm": 0.969393253326416, + "learning_rate": 1e-05, + "loss": 0.2378, + "step": 540 + }, + { + "epoch": 0.0018846847093816179, + "grad_norm": 0.9745528697967529, + "learning_rate": 1e-05, + "loss": 0.2774, + "step": 550 + }, + { + "epoch": 0.0019189517040976473, + "grad_norm": 1.0725352764129639, + "learning_rate": 1e-05, + "loss": 0.2541, + "step": 560 + }, + { + "epoch": 0.0019532186988136767, + "grad_norm": 1.217871904373169, + "learning_rate": 1e-05, + "loss": 0.2395, + "step": 570 + }, + { + "epoch": 0.001987485693529706, + "grad_norm": 1.3582627773284912, + "learning_rate": 1e-05, + "loss": 0.2594, + "step": 580 + }, + { + "epoch": 0.0020217526882457355, + "grad_norm": 1.2415379285812378, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 590 + }, + { + "epoch": 0.002056019682961765, + "grad_norm": 0.9810131192207336, + "learning_rate": 1e-05, + "loss": 0.2284, + "step": 600 + }, + { + "epoch": 0.0020902866776777943, + "grad_norm": 0.9806564450263977, + "learning_rate": 1e-05, + "loss": 0.2688, + "step": 610 + }, + { + "epoch": 0.0021245536723938237, + "grad_norm": 1.2755467891693115, + "learning_rate": 1e-05, + "loss": 0.2591, + "step": 620 + }, + { + "epoch": 0.002158820667109853, + "grad_norm": 0.9300326704978943, + "learning_rate": 1e-05, + "loss": 0.2444, + "step": 630 + }, + { + "epoch": 0.0021930876618258825, + "grad_norm": 1.1276524066925049, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 640 + }, + { + "epoch": 0.002227354656541912, + "grad_norm": 1.1786876916885376, + "learning_rate": 1e-05, + "loss": 0.2443, + "step": 650 + }, + { + "epoch": 0.0022616216512579414, + "grad_norm": 1.1702712774276733, + "learning_rate": 1e-05, + "loss": 0.2627, + "step": 660 + }, + { + "epoch": 0.0022958886459739708, + "grad_norm": 1.2837899923324585, + "learning_rate": 1e-05, + "loss": 0.2378, + "step": 670 + }, + { + "epoch": 0.00233015564069, + "grad_norm": 1.0623608827590942, + "learning_rate": 1e-05, + "loss": 0.2491, + "step": 680 + }, + { + "epoch": 0.0023644226354060296, + "grad_norm": 1.1288243532180786, + "learning_rate": 1e-05, + "loss": 0.2773, + "step": 690 + }, + { + "epoch": 0.002398689630122059, + "grad_norm": 1.0192692279815674, + "learning_rate": 1e-05, + "loss": 0.2492, + "step": 700 + }, + { + "epoch": 0.0024329566248380884, + "grad_norm": 1.2274680137634277, + "learning_rate": 1e-05, + "loss": 0.2345, + "step": 710 + }, + { + "epoch": 0.002467223619554118, + "grad_norm": 1.240645170211792, + "learning_rate": 1e-05, + "loss": 0.2624, + "step": 720 + }, + { + "epoch": 0.002501490614270147, + "grad_norm": 1.0681366920471191, + "learning_rate": 1e-05, + "loss": 0.2553, + "step": 730 + }, + { + "epoch": 0.0025357576089861766, + "grad_norm": 1.0161867141723633, + "learning_rate": 1e-05, + "loss": 0.2547, + "step": 740 + }, + { + "epoch": 0.002570024603702206, + "grad_norm": 1.2384017705917358, + "learning_rate": 1e-05, + "loss": 0.2449, + "step": 750 + }, + { + "epoch": 0.0026042915984182354, + "grad_norm": 1.1739261150360107, + "learning_rate": 1e-05, + "loss": 0.2523, + "step": 760 + }, + { + "epoch": 0.002638558593134265, + "grad_norm": 1.0396535396575928, + "learning_rate": 1e-05, + "loss": 0.2535, + "step": 770 + }, + { + "epoch": 0.0026728255878502943, + "grad_norm": 1.14767324924469, + "learning_rate": 1e-05, + "loss": 0.2594, + "step": 780 + }, + { + "epoch": 0.0027070925825663237, + "grad_norm": 1.1783303022384644, + "learning_rate": 1e-05, + "loss": 0.2546, + "step": 790 + }, + { + "epoch": 0.002741359577282353, + "grad_norm": 1.1065645217895508, + "learning_rate": 1e-05, + "loss": 0.2547, + "step": 800 + }, + { + "epoch": 0.0027756265719983825, + "grad_norm": 1.256645917892456, + "learning_rate": 1e-05, + "loss": 0.2548, + "step": 810 + }, + { + "epoch": 0.002809893566714412, + "grad_norm": 1.058158278465271, + "learning_rate": 1e-05, + "loss": 0.257, + "step": 820 + }, + { + "epoch": 0.0028441605614304413, + "grad_norm": 1.0647656917572021, + "learning_rate": 1e-05, + "loss": 0.2479, + "step": 830 + }, + { + "epoch": 0.0028784275561464707, + "grad_norm": 1.1984691619873047, + "learning_rate": 1e-05, + "loss": 0.2503, + "step": 840 + }, + { + "epoch": 0.0029126945508625, + "grad_norm": 1.1380070447921753, + "learning_rate": 1e-05, + "loss": 0.245, + "step": 850 + }, + { + "epoch": 0.0029469615455785295, + "grad_norm": 1.2131065130233765, + "learning_rate": 1e-05, + "loss": 0.242, + "step": 860 + }, + { + "epoch": 0.002981228540294559, + "grad_norm": 1.1822234392166138, + "learning_rate": 1e-05, + "loss": 0.2613, + "step": 870 + }, + { + "epoch": 0.0030154955350105883, + "grad_norm": 1.0591018199920654, + "learning_rate": 1e-05, + "loss": 0.2654, + "step": 880 + }, + { + "epoch": 0.0030497625297266177, + "grad_norm": 1.2318428754806519, + "learning_rate": 1e-05, + "loss": 0.2525, + "step": 890 + }, + { + "epoch": 0.003084029524442647, + "grad_norm": 1.0146839618682861, + "learning_rate": 1e-05, + "loss": 0.2609, + "step": 900 + }, + { + "epoch": 0.0031182965191586766, + "grad_norm": 1.1508561372756958, + "learning_rate": 1e-05, + "loss": 0.2541, + "step": 910 + }, + { + "epoch": 0.003152563513874706, + "grad_norm": 1.1494849920272827, + "learning_rate": 1e-05, + "loss": 0.2461, + "step": 920 + }, + { + "epoch": 0.0031868305085907354, + "grad_norm": 1.2423807382583618, + "learning_rate": 1e-05, + "loss": 0.2573, + "step": 930 + }, + { + "epoch": 0.0032210975033067648, + "grad_norm": 1.2714438438415527, + "learning_rate": 1e-05, + "loss": 0.2545, + "step": 940 + }, + { + "epoch": 0.0032553644980227946, + "grad_norm": 1.2088007926940918, + "learning_rate": 1e-05, + "loss": 0.2773, + "step": 950 + }, + { + "epoch": 0.003289631492738824, + "grad_norm": 1.0737963914871216, + "learning_rate": 1e-05, + "loss": 0.2495, + "step": 960 + }, + { + "epoch": 0.0033238984874548534, + "grad_norm": 1.0942472219467163, + "learning_rate": 1e-05, + "loss": 0.2401, + "step": 970 + }, + { + "epoch": 0.003358165482170883, + "grad_norm": 1.1282986402511597, + "learning_rate": 1e-05, + "loss": 0.2638, + "step": 980 + }, + { + "epoch": 0.0033924324768869123, + "grad_norm": 1.0762425661087036, + "learning_rate": 1e-05, + "loss": 0.2619, + "step": 990 + }, + { + "epoch": 0.0034266994716029417, + "grad_norm": 1.09200119972229, + "learning_rate": 1e-05, + "loss": 0.2464, + "step": 1000 + }, + { + "epoch": 0.0034266994716029417, + "eval_cer": 13.80313988357735, + "eval_loss": 0.25397512316703796, + "eval_normalized_cer": 9.952038369304557, + "eval_runtime": 227.5088, + "eval_samples_per_second": 2.25, + "eval_steps_per_second": 0.035, + "step": 1000 + }, + { + "epoch": 0.003460966466318971, + "grad_norm": 0.9681844711303711, + "learning_rate": 1e-05, + "loss": 0.2567, + "step": 1010 + }, + { + "epoch": 0.0034952334610350005, + "grad_norm": 1.0064711570739746, + "learning_rate": 1e-05, + "loss": 0.2514, + "step": 1020 + }, + { + "epoch": 0.00352950045575103, + "grad_norm": 1.190294623374939, + "learning_rate": 1e-05, + "loss": 0.2654, + "step": 1030 + }, + { + "epoch": 0.0035637674504670593, + "grad_norm": 1.332492709159851, + "learning_rate": 1e-05, + "loss": 0.2725, + "step": 1040 + }, + { + "epoch": 0.0035980344451830887, + "grad_norm": 1.1110397577285767, + "learning_rate": 1e-05, + "loss": 0.2504, + "step": 1050 + }, + { + "epoch": 0.003632301439899118, + "grad_norm": 1.2327215671539307, + "learning_rate": 1e-05, + "loss": 0.2733, + "step": 1060 + }, + { + "epoch": 0.0036665684346151475, + "grad_norm": 1.1694815158843994, + "learning_rate": 1e-05, + "loss": 0.2611, + "step": 1070 + }, + { + "epoch": 0.003700835429331177, + "grad_norm": 1.212570309638977, + "learning_rate": 1e-05, + "loss": 0.2556, + "step": 1080 + }, + { + "epoch": 0.0037351024240472063, + "grad_norm": 1.1467297077178955, + "learning_rate": 1e-05, + "loss": 0.2485, + "step": 1090 + }, + { + "epoch": 0.0037693694187632357, + "grad_norm": 0.9628469347953796, + "learning_rate": 1e-05, + "loss": 0.2523, + "step": 1100 + }, + { + "epoch": 0.003803636413479265, + "grad_norm": 1.1593494415283203, + "learning_rate": 1e-05, + "loss": 0.2635, + "step": 1110 + }, + { + "epoch": 0.0038379034081952946, + "grad_norm": 1.1376386880874634, + "learning_rate": 1e-05, + "loss": 0.2504, + "step": 1120 + }, + { + "epoch": 0.003872170402911324, + "grad_norm": 1.129338026046753, + "learning_rate": 1e-05, + "loss": 0.2601, + "step": 1130 + }, + { + "epoch": 0.003906437397627353, + "grad_norm": 1.0889575481414795, + "learning_rate": 1e-05, + "loss": 0.2455, + "step": 1140 + }, + { + "epoch": 0.003940704392343382, + "grad_norm": 1.1437270641326904, + "learning_rate": 1e-05, + "loss": 0.253, + "step": 1150 + }, + { + "epoch": 0.003974971387059412, + "grad_norm": 1.0283392667770386, + "learning_rate": 1e-05, + "loss": 0.2507, + "step": 1160 + }, + { + "epoch": 0.004009238381775441, + "grad_norm": 1.130747675895691, + "learning_rate": 1e-05, + "loss": 0.2715, + "step": 1170 + }, + { + "epoch": 0.004043505376491471, + "grad_norm": 1.3483778238296509, + "learning_rate": 1e-05, + "loss": 0.2742, + "step": 1180 + }, + { + "epoch": 0.0040777723712075, + "grad_norm": 1.0879924297332764, + "learning_rate": 1e-05, + "loss": 0.2641, + "step": 1190 + }, + { + "epoch": 0.00411203936592353, + "grad_norm": 1.1242927312850952, + "learning_rate": 1e-05, + "loss": 0.2586, + "step": 1200 + }, + { + "epoch": 0.004146306360639559, + "grad_norm": 1.0185858011245728, + "learning_rate": 1e-05, + "loss": 0.2465, + "step": 1210 + }, + { + "epoch": 0.004180573355355589, + "grad_norm": 0.9555259943008423, + "learning_rate": 1e-05, + "loss": 0.2528, + "step": 1220 + }, + { + "epoch": 0.004214840350071618, + "grad_norm": 1.210371971130371, + "learning_rate": 1e-05, + "loss": 0.2613, + "step": 1230 + }, + { + "epoch": 0.0042491073447876474, + "grad_norm": 1.1261368989944458, + "learning_rate": 1e-05, + "loss": 0.2551, + "step": 1240 + }, + { + "epoch": 0.004283374339503676, + "grad_norm": 1.2142603397369385, + "learning_rate": 1e-05, + "loss": 0.264, + "step": 1250 + }, + { + "epoch": 0.004317641334219706, + "grad_norm": 1.057758092880249, + "learning_rate": 1e-05, + "loss": 0.2587, + "step": 1260 + }, + { + "epoch": 0.004351908328935736, + "grad_norm": 1.0871245861053467, + "learning_rate": 1e-05, + "loss": 0.2549, + "step": 1270 + }, + { + "epoch": 0.004386175323651765, + "grad_norm": 1.1214648485183716, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 1280 + }, + { + "epoch": 0.004420442318367795, + "grad_norm": 1.0265707969665527, + "learning_rate": 1e-05, + "loss": 0.2123, + "step": 1290 + }, + { + "epoch": 0.004454709313083824, + "grad_norm": 1.1180216073989868, + "learning_rate": 1e-05, + "loss": 0.2245, + "step": 1300 + }, + { + "epoch": 0.004488976307799854, + "grad_norm": 1.028238296508789, + "learning_rate": 1e-05, + "loss": 0.2118, + "step": 1310 + }, + { + "epoch": 0.004523243302515883, + "grad_norm": 1.0321682691574097, + "learning_rate": 1e-05, + "loss": 0.2196, + "step": 1320 + }, + { + "epoch": 0.0045575102972319126, + "grad_norm": 1.1180269718170166, + "learning_rate": 1e-05, + "loss": 0.2403, + "step": 1330 + }, + { + "epoch": 0.0045917772919479415, + "grad_norm": 1.079560399055481, + "learning_rate": 1e-05, + "loss": 0.2309, + "step": 1340 + }, + { + "epoch": 0.004626044286663971, + "grad_norm": 1.0062284469604492, + "learning_rate": 1e-05, + "loss": 0.228, + "step": 1350 + }, + { + "epoch": 0.00466031128138, + "grad_norm": 1.1098395586013794, + "learning_rate": 1e-05, + "loss": 0.2435, + "step": 1360 + }, + { + "epoch": 0.00469457827609603, + "grad_norm": 1.0619688034057617, + "learning_rate": 1e-05, + "loss": 0.2342, + "step": 1370 + }, + { + "epoch": 0.004728845270812059, + "grad_norm": 1.1943925619125366, + "learning_rate": 1e-05, + "loss": 0.2315, + "step": 1380 + }, + { + "epoch": 0.004763112265528089, + "grad_norm": 1.0958552360534668, + "learning_rate": 1e-05, + "loss": 0.2379, + "step": 1390 + }, + { + "epoch": 0.004797379260244118, + "grad_norm": 1.0984197854995728, + "learning_rate": 1e-05, + "loss": 0.2208, + "step": 1400 + }, + { + "epoch": 0.004831646254960148, + "grad_norm": 1.0741859674453735, + "learning_rate": 1e-05, + "loss": 0.2378, + "step": 1410 + }, + { + "epoch": 0.004865913249676177, + "grad_norm": 1.1457058191299438, + "learning_rate": 1e-05, + "loss": 0.2516, + "step": 1420 + }, + { + "epoch": 0.004900180244392207, + "grad_norm": 0.9849014282226562, + "learning_rate": 1e-05, + "loss": 0.2406, + "step": 1430 + }, + { + "epoch": 0.004934447239108236, + "grad_norm": 1.1174912452697754, + "learning_rate": 1e-05, + "loss": 0.2122, + "step": 1440 + }, + { + "epoch": 0.0049687142338242654, + "grad_norm": 1.0292854309082031, + "learning_rate": 1e-05, + "loss": 0.2349, + "step": 1450 + }, + { + "epoch": 0.005002981228540294, + "grad_norm": 1.0343785285949707, + "learning_rate": 1e-05, + "loss": 0.2158, + "step": 1460 + }, + { + "epoch": 0.005037248223256324, + "grad_norm": 1.1178008317947388, + "learning_rate": 1e-05, + "loss": 0.2264, + "step": 1470 + }, + { + "epoch": 0.005071515217972353, + "grad_norm": 1.0238450765609741, + "learning_rate": 1e-05, + "loss": 0.2287, + "step": 1480 + }, + { + "epoch": 0.005105782212688383, + "grad_norm": 1.1728886365890503, + "learning_rate": 1e-05, + "loss": 0.2373, + "step": 1490 + }, + { + "epoch": 0.005140049207404412, + "grad_norm": 1.227034091949463, + "learning_rate": 1e-05, + "loss": 0.222, + "step": 1500 + }, + { + "epoch": 0.005140049207404412, + "eval_cer": 13.150467454577527, + "eval_loss": 0.25801682472229004, + "eval_normalized_cer": 9.452438049560353, + "eval_runtime": 227.9378, + "eval_samples_per_second": 2.246, + "eval_steps_per_second": 0.035, + "step": 1500 + }, + { + "epoch": 0.005174316202120442, + "grad_norm": 1.0703920125961304, + "learning_rate": 1e-05, + "loss": 0.2156, + "step": 1510 + }, + { + "epoch": 0.005208583196836471, + "grad_norm": 1.1343841552734375, + "learning_rate": 1e-05, + "loss": 0.2126, + "step": 1520 + }, + { + "epoch": 0.005242850191552501, + "grad_norm": 1.1743741035461426, + "learning_rate": 1e-05, + "loss": 0.2491, + "step": 1530 + }, + { + "epoch": 0.00527711718626853, + "grad_norm": 1.1476744413375854, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 1540 + }, + { + "epoch": 0.0053113841809845595, + "grad_norm": 1.0899590253829956, + "learning_rate": 1e-05, + "loss": 0.2361, + "step": 1550 + }, + { + "epoch": 0.0053456511757005885, + "grad_norm": 1.0281250476837158, + "learning_rate": 1e-05, + "loss": 0.2226, + "step": 1560 + }, + { + "epoch": 0.005379918170416618, + "grad_norm": 0.9932867884635925, + "learning_rate": 1e-05, + "loss": 0.2301, + "step": 1570 + }, + { + "epoch": 0.005414185165132647, + "grad_norm": 1.1992309093475342, + "learning_rate": 1e-05, + "loss": 0.2179, + "step": 1580 + }, + { + "epoch": 0.005448452159848677, + "grad_norm": 1.0017774105072021, + "learning_rate": 1e-05, + "loss": 0.2244, + "step": 1590 + }, + { + "epoch": 0.005482719154564706, + "grad_norm": 1.0827686786651611, + "learning_rate": 1e-05, + "loss": 0.2313, + "step": 1600 + }, + { + "epoch": 0.005516986149280736, + "grad_norm": 1.2260409593582153, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 1610 + }, + { + "epoch": 0.005551253143996765, + "grad_norm": 1.2530804872512817, + "learning_rate": 1e-05, + "loss": 0.2437, + "step": 1620 + }, + { + "epoch": 0.005585520138712795, + "grad_norm": 1.068452000617981, + "learning_rate": 1e-05, + "loss": 0.2138, + "step": 1630 + }, + { + "epoch": 0.005619787133428824, + "grad_norm": 1.3108712434768677, + "learning_rate": 1e-05, + "loss": 0.2284, + "step": 1640 + }, + { + "epoch": 0.005654054128144854, + "grad_norm": 1.0919209718704224, + "learning_rate": 1e-05, + "loss": 0.213, + "step": 1650 + }, + { + "epoch": 0.005688321122860883, + "grad_norm": 1.1530914306640625, + "learning_rate": 1e-05, + "loss": 0.2292, + "step": 1660 + }, + { + "epoch": 0.005722588117576912, + "grad_norm": 1.084028959274292, + "learning_rate": 1e-05, + "loss": 0.2393, + "step": 1670 + }, + { + "epoch": 0.005756855112292941, + "grad_norm": 1.247847557067871, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 1680 + }, + { + "epoch": 0.005791122107008971, + "grad_norm": 1.03806734085083, + "learning_rate": 1e-05, + "loss": 0.2317, + "step": 1690 + }, + { + "epoch": 0.005825389101725, + "grad_norm": 1.1643092632293701, + "learning_rate": 1e-05, + "loss": 0.2348, + "step": 1700 + }, + { + "epoch": 0.00585965609644103, + "grad_norm": 1.1066207885742188, + "learning_rate": 1e-05, + "loss": 0.2348, + "step": 1710 + }, + { + "epoch": 0.005893923091157059, + "grad_norm": 1.1813760995864868, + "learning_rate": 1e-05, + "loss": 0.2295, + "step": 1720 + }, + { + "epoch": 0.005928190085873089, + "grad_norm": 1.1444518566131592, + "learning_rate": 1e-05, + "loss": 0.2101, + "step": 1730 + }, + { + "epoch": 0.005962457080589118, + "grad_norm": 1.1485129594802856, + "learning_rate": 1e-05, + "loss": 0.2397, + "step": 1740 + }, + { + "epoch": 0.005996724075305148, + "grad_norm": 1.1813607215881348, + "learning_rate": 1e-05, + "loss": 0.231, + "step": 1750 + }, + { + "epoch": 0.006030991070021177, + "grad_norm": 1.4075005054473877, + "learning_rate": 1e-05, + "loss": 0.2306, + "step": 1760 + }, + { + "epoch": 0.0060652580647372065, + "grad_norm": 1.2183804512023926, + "learning_rate": 1e-05, + "loss": 0.2227, + "step": 1770 + }, + { + "epoch": 0.0060995250594532355, + "grad_norm": 1.3654927015304565, + "learning_rate": 1e-05, + "loss": 0.2341, + "step": 1780 + }, + { + "epoch": 0.006133792054169265, + "grad_norm": 1.2806668281555176, + "learning_rate": 1e-05, + "loss": 0.2226, + "step": 1790 + }, + { + "epoch": 0.006168059048885294, + "grad_norm": 1.2949618101119995, + "learning_rate": 1e-05, + "loss": 0.2698, + "step": 1800 + }, + { + "epoch": 0.006202326043601324, + "grad_norm": 1.3080159425735474, + "learning_rate": 1e-05, + "loss": 0.2691, + "step": 1810 + }, + { + "epoch": 0.006236593038317353, + "grad_norm": 1.1831908226013184, + "learning_rate": 1e-05, + "loss": 0.2644, + "step": 1820 + }, + { + "epoch": 0.006270860033033383, + "grad_norm": 1.1216965913772583, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 1830 + }, + { + "epoch": 0.006305127027749412, + "grad_norm": 1.1943161487579346, + "learning_rate": 1e-05, + "loss": 0.2769, + "step": 1840 + }, + { + "epoch": 0.006339394022465442, + "grad_norm": 1.0856040716171265, + "learning_rate": 1e-05, + "loss": 0.2526, + "step": 1850 + }, + { + "epoch": 0.006373661017181471, + "grad_norm": 1.1100040674209595, + "learning_rate": 1e-05, + "loss": 0.2576, + "step": 1860 + }, + { + "epoch": 0.006407928011897501, + "grad_norm": 1.3369051218032837, + "learning_rate": 1e-05, + "loss": 0.2684, + "step": 1870 + }, + { + "epoch": 0.0064421950066135296, + "grad_norm": 1.158797264099121, + "learning_rate": 1e-05, + "loss": 0.2474, + "step": 1880 + }, + { + "epoch": 0.006476462001329559, + "grad_norm": 1.1821873188018799, + "learning_rate": 1e-05, + "loss": 0.272, + "step": 1890 + }, + { + "epoch": 0.006510728996045589, + "grad_norm": 1.0739686489105225, + "learning_rate": 1e-05, + "loss": 0.2798, + "step": 1900 + }, + { + "epoch": 0.006544995990761618, + "grad_norm": 1.0639653205871582, + "learning_rate": 1e-05, + "loss": 0.2682, + "step": 1910 + }, + { + "epoch": 0.006579262985477648, + "grad_norm": 1.2149512767791748, + "learning_rate": 1e-05, + "loss": 0.2586, + "step": 1920 + }, + { + "epoch": 0.006613529980193677, + "grad_norm": 1.1057014465332031, + "learning_rate": 1e-05, + "loss": 0.2719, + "step": 1930 + }, + { + "epoch": 0.006647796974909707, + "grad_norm": 1.0929185152053833, + "learning_rate": 1e-05, + "loss": 0.2703, + "step": 1940 + }, + { + "epoch": 0.006682063969625736, + "grad_norm": 1.0322917699813843, + "learning_rate": 1e-05, + "loss": 0.2477, + "step": 1950 + }, + { + "epoch": 0.006716330964341766, + "grad_norm": 1.2460272312164307, + "learning_rate": 1e-05, + "loss": 0.2816, + "step": 1960 + }, + { + "epoch": 0.006750597959057795, + "grad_norm": 1.2049859762191772, + "learning_rate": 1e-05, + "loss": 0.2648, + "step": 1970 + }, + { + "epoch": 0.0067848649537738245, + "grad_norm": 1.1182633638381958, + "learning_rate": 1e-05, + "loss": 0.2549, + "step": 1980 + }, + { + "epoch": 0.0068191319484898535, + "grad_norm": 1.1514990329742432, + "learning_rate": 1e-05, + "loss": 0.2695, + "step": 1990 + }, + { + "epoch": 0.006853398943205883, + "grad_norm": 1.0150858163833618, + "learning_rate": 1e-05, + "loss": 0.2532, + "step": 2000 + }, + { + "epoch": 0.006853398943205883, + "eval_cer": 13.565002645969306, + "eval_loss": 0.2523655593395233, + "eval_normalized_cer": 9.942046362909672, + "eval_runtime": 226.5571, + "eval_samples_per_second": 2.26, + "eval_steps_per_second": 0.035, + "step": 2000 + }, + { + "epoch": 0.006887665937921912, + "grad_norm": 1.0476700067520142, + "learning_rate": 1e-05, + "loss": 0.2555, + "step": 2010 + }, + { + "epoch": 0.006921932932637942, + "grad_norm": 1.1178691387176514, + "learning_rate": 1e-05, + "loss": 0.2489, + "step": 2020 + }, + { + "epoch": 0.006956199927353971, + "grad_norm": 1.2596313953399658, + "learning_rate": 1e-05, + "loss": 0.2884, + "step": 2030 + }, + { + "epoch": 0.006990466922070001, + "grad_norm": 1.1929702758789062, + "learning_rate": 1e-05, + "loss": 0.262, + "step": 2040 + }, + { + "epoch": 0.00702473391678603, + "grad_norm": 1.1269497871398926, + "learning_rate": 1e-05, + "loss": 0.2758, + "step": 2050 + }, + { + "epoch": 0.00705900091150206, + "grad_norm": 1.1495511531829834, + "learning_rate": 1e-05, + "loss": 0.2668, + "step": 2060 + }, + { + "epoch": 0.007093267906218089, + "grad_norm": 1.0648061037063599, + "learning_rate": 1e-05, + "loss": 0.2548, + "step": 2070 + }, + { + "epoch": 0.007127534900934119, + "grad_norm": 1.3193435668945312, + "learning_rate": 1e-05, + "loss": 0.2743, + "step": 2080 + }, + { + "epoch": 0.0071618018956501476, + "grad_norm": 1.2877907752990723, + "learning_rate": 1e-05, + "loss": 0.248, + "step": 2090 + }, + { + "epoch": 0.007196068890366177, + "grad_norm": 1.2012474536895752, + "learning_rate": 1e-05, + "loss": 0.2662, + "step": 2100 + }, + { + "epoch": 0.007230335885082206, + "grad_norm": 1.1491566896438599, + "learning_rate": 1e-05, + "loss": 0.2666, + "step": 2110 + }, + { + "epoch": 0.007264602879798236, + "grad_norm": 1.1861019134521484, + "learning_rate": 1e-05, + "loss": 0.2618, + "step": 2120 + }, + { + "epoch": 0.007298869874514265, + "grad_norm": 1.123963713645935, + "learning_rate": 1e-05, + "loss": 0.2646, + "step": 2130 + }, + { + "epoch": 0.007333136869230295, + "grad_norm": 1.2697441577911377, + "learning_rate": 1e-05, + "loss": 0.2713, + "step": 2140 + }, + { + "epoch": 0.007367403863946324, + "grad_norm": 0.9741083383560181, + "learning_rate": 1e-05, + "loss": 0.2463, + "step": 2150 + }, + { + "epoch": 0.007401670858662354, + "grad_norm": 1.0292670726776123, + "learning_rate": 1e-05, + "loss": 0.2542, + "step": 2160 + }, + { + "epoch": 0.007435937853378383, + "grad_norm": 1.0958001613616943, + "learning_rate": 1e-05, + "loss": 0.2463, + "step": 2170 + }, + { + "epoch": 0.007470204848094413, + "grad_norm": 1.166869044303894, + "learning_rate": 1e-05, + "loss": 0.2454, + "step": 2180 + }, + { + "epoch": 0.007504471842810442, + "grad_norm": 1.2552424669265747, + "learning_rate": 1e-05, + "loss": 0.2498, + "step": 2190 + }, + { + "epoch": 0.0075387388375264715, + "grad_norm": 1.1589868068695068, + "learning_rate": 1e-05, + "loss": 0.2659, + "step": 2200 + }, + { + "epoch": 0.0075730058322425004, + "grad_norm": 1.1640287637710571, + "learning_rate": 1e-05, + "loss": 0.257, + "step": 2210 + }, + { + "epoch": 0.00760727282695853, + "grad_norm": 1.0953587293624878, + "learning_rate": 1e-05, + "loss": 0.2444, + "step": 2220 + }, + { + "epoch": 0.007641539821674559, + "grad_norm": 1.2174441814422607, + "learning_rate": 1e-05, + "loss": 0.2626, + "step": 2230 + }, + { + "epoch": 0.007675806816390589, + "grad_norm": 1.1194220781326294, + "learning_rate": 1e-05, + "loss": 0.241, + "step": 2240 + }, + { + "epoch": 0.007710073811106618, + "grad_norm": 1.0677419900894165, + "learning_rate": 1e-05, + "loss": 0.2718, + "step": 2250 + }, + { + "epoch": 0.007744340805822648, + "grad_norm": 1.0956069231033325, + "learning_rate": 1e-05, + "loss": 0.2493, + "step": 2260 + }, + { + "epoch": 0.007778607800538677, + "grad_norm": 1.1772819757461548, + "learning_rate": 1e-05, + "loss": 0.2614, + "step": 2270 + }, + { + "epoch": 0.007812874795254707, + "grad_norm": 1.0341110229492188, + "learning_rate": 1e-05, + "loss": 0.2488, + "step": 2280 + }, + { + "epoch": 0.007847141789970737, + "grad_norm": 1.174186110496521, + "learning_rate": 1e-05, + "loss": 0.2542, + "step": 2290 + }, + { + "epoch": 0.007881408784686765, + "grad_norm": 0.9867792725563049, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 2300 + }, + { + "epoch": 0.007915675779402795, + "grad_norm": 1.1443661451339722, + "learning_rate": 1e-05, + "loss": 0.2331, + "step": 2310 + }, + { + "epoch": 0.007949942774118824, + "grad_norm": 1.117896318435669, + "learning_rate": 1e-05, + "loss": 0.2277, + "step": 2320 + }, + { + "epoch": 0.007984209768834854, + "grad_norm": 1.13510000705719, + "learning_rate": 1e-05, + "loss": 0.2137, + "step": 2330 + }, + { + "epoch": 0.008018476763550882, + "grad_norm": 0.9749162793159485, + "learning_rate": 1e-05, + "loss": 0.2161, + "step": 2340 + }, + { + "epoch": 0.008052743758266912, + "grad_norm": 1.1519534587860107, + "learning_rate": 1e-05, + "loss": 0.2254, + "step": 2350 + }, + { + "epoch": 0.008087010752982942, + "grad_norm": 1.0861778259277344, + "learning_rate": 1e-05, + "loss": 0.2153, + "step": 2360 + }, + { + "epoch": 0.008121277747698972, + "grad_norm": 1.0184444189071655, + "learning_rate": 1e-05, + "loss": 0.2066, + "step": 2370 + }, + { + "epoch": 0.008155544742415, + "grad_norm": 1.0581239461898804, + "learning_rate": 1e-05, + "loss": 0.2243, + "step": 2380 + }, + { + "epoch": 0.00818981173713103, + "grad_norm": 0.9954540729522705, + "learning_rate": 1e-05, + "loss": 0.2171, + "step": 2390 + }, + { + "epoch": 0.00822407873184706, + "grad_norm": 1.121960163116455, + "learning_rate": 1e-05, + "loss": 0.2216, + "step": 2400 + }, + { + "epoch": 0.00825834572656309, + "grad_norm": 1.097725510597229, + "learning_rate": 1e-05, + "loss": 0.2142, + "step": 2410 + }, + { + "epoch": 0.008292612721279118, + "grad_norm": 1.0566459894180298, + "learning_rate": 1e-05, + "loss": 0.2272, + "step": 2420 + }, + { + "epoch": 0.008326879715995147, + "grad_norm": 1.0077927112579346, + "learning_rate": 1e-05, + "loss": 0.211, + "step": 2430 + }, + { + "epoch": 0.008361146710711177, + "grad_norm": 1.176035761833191, + "learning_rate": 1e-05, + "loss": 0.2125, + "step": 2440 + }, + { + "epoch": 0.008395413705427207, + "grad_norm": 1.0064568519592285, + "learning_rate": 1e-05, + "loss": 0.2066, + "step": 2450 + }, + { + "epoch": 0.008429680700143235, + "grad_norm": 1.1852171421051025, + "learning_rate": 1e-05, + "loss": 0.2087, + "step": 2460 + }, + { + "epoch": 0.008463947694859265, + "grad_norm": 0.9580971002578735, + "learning_rate": 1e-05, + "loss": 0.2172, + "step": 2470 + }, + { + "epoch": 0.008498214689575295, + "grad_norm": 1.1230813264846802, + "learning_rate": 1e-05, + "loss": 0.2104, + "step": 2480 + }, + { + "epoch": 0.008532481684291325, + "grad_norm": 1.1891340017318726, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 2490 + }, + { + "epoch": 0.008566748679007353, + "grad_norm": 1.2579045295715332, + "learning_rate": 1e-05, + "loss": 0.2109, + "step": 2500 + }, + { + "epoch": 0.008566748679007353, + "eval_cer": 13.300405715293703, + "eval_loss": 0.26059621572494507, + "eval_normalized_cer": 9.502398081534773, + "eval_runtime": 226.5522, + "eval_samples_per_second": 2.26, + "eval_steps_per_second": 0.035, + "step": 2500 + }, + { + "epoch": 0.008601015673723383, + "grad_norm": 1.0522507429122925, + "learning_rate": 1e-05, + "loss": 0.2154, + "step": 2510 + }, + { + "epoch": 0.008635282668439413, + "grad_norm": 1.0875492095947266, + "learning_rate": 1e-05, + "loss": 0.2251, + "step": 2520 + }, + { + "epoch": 0.008669549663155442, + "grad_norm": 1.0868346691131592, + "learning_rate": 1e-05, + "loss": 0.2086, + "step": 2530 + }, + { + "epoch": 0.008703816657871472, + "grad_norm": 1.0993175506591797, + "learning_rate": 1e-05, + "loss": 0.205, + "step": 2540 + }, + { + "epoch": 0.0087380836525875, + "grad_norm": 1.0495941638946533, + "learning_rate": 1e-05, + "loss": 0.2135, + "step": 2550 + }, + { + "epoch": 0.00877235064730353, + "grad_norm": 1.0326807498931885, + "learning_rate": 1e-05, + "loss": 0.2105, + "step": 2560 + }, + { + "epoch": 0.00880661764201956, + "grad_norm": 1.0804367065429688, + "learning_rate": 1e-05, + "loss": 0.2438, + "step": 2570 + }, + { + "epoch": 0.00884088463673559, + "grad_norm": 1.0738023519515991, + "learning_rate": 1e-05, + "loss": 0.2537, + "step": 2580 + }, + { + "epoch": 0.008875151631451618, + "grad_norm": 1.1695871353149414, + "learning_rate": 1e-05, + "loss": 0.2518, + "step": 2590 + }, + { + "epoch": 0.008909418626167648, + "grad_norm": 1.155653476715088, + "learning_rate": 1e-05, + "loss": 0.2592, + "step": 2600 + }, + { + "epoch": 0.008943685620883678, + "grad_norm": 1.1516027450561523, + "learning_rate": 1e-05, + "loss": 0.2387, + "step": 2610 + }, + { + "epoch": 0.008977952615599707, + "grad_norm": 1.2618260383605957, + "learning_rate": 1e-05, + "loss": 0.2638, + "step": 2620 + }, + { + "epoch": 0.009012219610315736, + "grad_norm": 1.2422987222671509, + "learning_rate": 1e-05, + "loss": 0.2459, + "step": 2630 + }, + { + "epoch": 0.009046486605031765, + "grad_norm": 1.1460082530975342, + "learning_rate": 1e-05, + "loss": 0.2509, + "step": 2640 + }, + { + "epoch": 0.009080753599747795, + "grad_norm": 1.2502261400222778, + "learning_rate": 1e-05, + "loss": 0.2595, + "step": 2650 + }, + { + "epoch": 0.009115020594463825, + "grad_norm": 1.139840006828308, + "learning_rate": 1e-05, + "loss": 0.255, + "step": 2660 + }, + { + "epoch": 0.009149287589179853, + "grad_norm": 1.3247896432876587, + "learning_rate": 1e-05, + "loss": 0.2721, + "step": 2670 + }, + { + "epoch": 0.009183554583895883, + "grad_norm": 1.1355103254318237, + "learning_rate": 1e-05, + "loss": 0.2604, + "step": 2680 + }, + { + "epoch": 0.009217821578611913, + "grad_norm": 1.106541633605957, + "learning_rate": 1e-05, + "loss": 0.2374, + "step": 2690 + }, + { + "epoch": 0.009252088573327943, + "grad_norm": 1.2375975847244263, + "learning_rate": 1e-05, + "loss": 0.2719, + "step": 2700 + }, + { + "epoch": 0.00928635556804397, + "grad_norm": 1.1048275232315063, + "learning_rate": 1e-05, + "loss": 0.2791, + "step": 2710 + }, + { + "epoch": 0.00932062256276, + "grad_norm": 0.9889766573905945, + "learning_rate": 1e-05, + "loss": 0.2457, + "step": 2720 + }, + { + "epoch": 0.00935488955747603, + "grad_norm": 1.1566202640533447, + "learning_rate": 1e-05, + "loss": 0.252, + "step": 2730 + }, + { + "epoch": 0.00938915655219206, + "grad_norm": 1.1586074829101562, + "learning_rate": 1e-05, + "loss": 0.2517, + "step": 2740 + }, + { + "epoch": 0.009423423546908088, + "grad_norm": 0.990419328212738, + "learning_rate": 1e-05, + "loss": 0.2572, + "step": 2750 + }, + { + "epoch": 0.009457690541624118, + "grad_norm": 1.1101089715957642, + "learning_rate": 1e-05, + "loss": 0.2525, + "step": 2760 + }, + { + "epoch": 0.009491957536340148, + "grad_norm": 1.0488269329071045, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 2770 + }, + { + "epoch": 0.009526224531056178, + "grad_norm": 1.1127737760543823, + "learning_rate": 1e-05, + "loss": 0.2578, + "step": 2780 + }, + { + "epoch": 0.009560491525772206, + "grad_norm": 1.2353262901306152, + "learning_rate": 1e-05, + "loss": 0.2412, + "step": 2790 + }, + { + "epoch": 0.009594758520488236, + "grad_norm": 1.1262571811676025, + "learning_rate": 1e-05, + "loss": 0.2438, + "step": 2800 + }, + { + "epoch": 0.009629025515204266, + "grad_norm": 1.294323205947876, + "learning_rate": 1e-05, + "loss": 0.2512, + "step": 2810 + }, + { + "epoch": 0.009663292509920296, + "grad_norm": 1.0706703662872314, + "learning_rate": 1e-05, + "loss": 0.2595, + "step": 2820 + }, + { + "epoch": 0.009697559504636324, + "grad_norm": 1.0089077949523926, + "learning_rate": 1e-05, + "loss": 0.2522, + "step": 2830 + }, + { + "epoch": 0.009731826499352354, + "grad_norm": 0.9697763323783875, + "learning_rate": 1e-05, + "loss": 0.2684, + "step": 2840 + }, + { + "epoch": 0.009766093494068383, + "grad_norm": 1.1122509241104126, + "learning_rate": 1e-05, + "loss": 0.2629, + "step": 2850 + }, + { + "epoch": 0.009800360488784413, + "grad_norm": 1.0381057262420654, + "learning_rate": 1e-05, + "loss": 0.2482, + "step": 2860 + }, + { + "epoch": 0.009834627483500441, + "grad_norm": 1.126947045326233, + "learning_rate": 1e-05, + "loss": 0.2674, + "step": 2870 + }, + { + "epoch": 0.009868894478216471, + "grad_norm": 1.0714973211288452, + "learning_rate": 1e-05, + "loss": 0.2634, + "step": 2880 + }, + { + "epoch": 0.009903161472932501, + "grad_norm": 1.0942039489746094, + "learning_rate": 1e-05, + "loss": 0.2751, + "step": 2890 + }, + { + "epoch": 0.009937428467648531, + "grad_norm": 1.1503955125808716, + "learning_rate": 1e-05, + "loss": 0.272, + "step": 2900 + }, + { + "epoch": 0.009971695462364559, + "grad_norm": 1.1912988424301147, + "learning_rate": 1e-05, + "loss": 0.2645, + "step": 2910 + }, + { + "epoch": 0.010005962457080589, + "grad_norm": 1.0941249132156372, + "learning_rate": 1e-05, + "loss": 0.2531, + "step": 2920 + }, + { + "epoch": 0.010040229451796619, + "grad_norm": 1.2545968294143677, + "learning_rate": 1e-05, + "loss": 0.2562, + "step": 2930 + }, + { + "epoch": 0.010074496446512649, + "grad_norm": 1.3605022430419922, + "learning_rate": 1e-05, + "loss": 0.2601, + "step": 2940 + }, + { + "epoch": 0.010108763441228677, + "grad_norm": 1.0911775827407837, + "learning_rate": 1e-05, + "loss": 0.2605, + "step": 2950 + }, + { + "epoch": 0.010143030435944706, + "grad_norm": 1.133867859840393, + "learning_rate": 1e-05, + "loss": 0.2554, + "step": 2960 + }, + { + "epoch": 0.010177297430660736, + "grad_norm": 1.2511764764785767, + "learning_rate": 1e-05, + "loss": 0.2658, + "step": 2970 + }, + { + "epoch": 0.010211564425376766, + "grad_norm": 1.1705303192138672, + "learning_rate": 1e-05, + "loss": 0.2737, + "step": 2980 + }, + { + "epoch": 0.010245831420092794, + "grad_norm": 1.132071614265442, + "learning_rate": 1e-05, + "loss": 0.2665, + "step": 2990 + }, + { + "epoch": 0.010280098414808824, + "grad_norm": 1.2301791906356812, + "learning_rate": 1e-05, + "loss": 0.2645, + "step": 3000 + }, + { + "epoch": 0.010280098414808824, + "eval_cer": 12.938789910037043, + "eval_loss": 0.2511608302593231, + "eval_normalized_cer": 9.152677857713828, + "eval_runtime": 227.4553, + "eval_samples_per_second": 2.251, + "eval_steps_per_second": 0.035, + "step": 3000 + }, + { + "epoch": 0.010314365409524854, + "grad_norm": 1.1527032852172852, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 3010 + }, + { + "epoch": 0.010348632404240884, + "grad_norm": 1.1162952184677124, + "learning_rate": 1e-05, + "loss": 0.2728, + "step": 3020 + }, + { + "epoch": 0.010382899398956912, + "grad_norm": 1.062084436416626, + "learning_rate": 1e-05, + "loss": 0.2496, + "step": 3030 + }, + { + "epoch": 0.010417166393672942, + "grad_norm": 1.1536457538604736, + "learning_rate": 1e-05, + "loss": 0.2633, + "step": 3040 + }, + { + "epoch": 0.010451433388388972, + "grad_norm": 1.2096189260482788, + "learning_rate": 1e-05, + "loss": 0.2498, + "step": 3050 + }, + { + "epoch": 0.010485700383105001, + "grad_norm": 0.9950299263000488, + "learning_rate": 1e-05, + "loss": 0.246, + "step": 3060 + }, + { + "epoch": 0.01051996737782103, + "grad_norm": 1.0628243684768677, + "learning_rate": 1e-05, + "loss": 0.2544, + "step": 3070 + }, + { + "epoch": 0.01055423437253706, + "grad_norm": 1.042555570602417, + "learning_rate": 1e-05, + "loss": 0.2401, + "step": 3080 + }, + { + "epoch": 0.01058850136725309, + "grad_norm": 1.22646164894104, + "learning_rate": 1e-05, + "loss": 0.2503, + "step": 3090 + }, + { + "epoch": 0.010622768361969119, + "grad_norm": 1.0862691402435303, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 3100 + }, + { + "epoch": 0.010657035356685147, + "grad_norm": 1.148868203163147, + "learning_rate": 1e-05, + "loss": 0.2526, + "step": 3110 + }, + { + "epoch": 0.010691302351401177, + "grad_norm": 1.1677169799804688, + "learning_rate": 1e-05, + "loss": 0.2481, + "step": 3120 + }, + { + "epoch": 0.010725569346117207, + "grad_norm": 0.990696132183075, + "learning_rate": 1e-05, + "loss": 0.2421, + "step": 3130 + }, + { + "epoch": 0.010759836340833237, + "grad_norm": 1.2869263887405396, + "learning_rate": 1e-05, + "loss": 0.2463, + "step": 3140 + }, + { + "epoch": 0.010794103335549265, + "grad_norm": 1.0741721391677856, + "learning_rate": 1e-05, + "loss": 0.2617, + "step": 3150 + }, + { + "epoch": 0.010828370330265295, + "grad_norm": 1.103102445602417, + "learning_rate": 1e-05, + "loss": 0.2442, + "step": 3160 + }, + { + "epoch": 0.010862637324981324, + "grad_norm": 1.2562378644943237, + "learning_rate": 1e-05, + "loss": 0.2589, + "step": 3170 + }, + { + "epoch": 0.010896904319697354, + "grad_norm": 1.2153191566467285, + "learning_rate": 1e-05, + "loss": 0.2417, + "step": 3180 + }, + { + "epoch": 0.010931171314413384, + "grad_norm": 1.0507330894470215, + "learning_rate": 1e-05, + "loss": 0.2607, + "step": 3190 + }, + { + "epoch": 0.010965438309129412, + "grad_norm": 1.1882787942886353, + "learning_rate": 1e-05, + "loss": 0.2469, + "step": 3200 + }, + { + "epoch": 0.010999705303845442, + "grad_norm": 1.1394702196121216, + "learning_rate": 1e-05, + "loss": 0.2574, + "step": 3210 + }, + { + "epoch": 0.011033972298561472, + "grad_norm": 1.2482614517211914, + "learning_rate": 1e-05, + "loss": 0.2456, + "step": 3220 + }, + { + "epoch": 0.011068239293277502, + "grad_norm": 1.0362995862960815, + "learning_rate": 1e-05, + "loss": 0.2589, + "step": 3230 + }, + { + "epoch": 0.01110250628799353, + "grad_norm": 1.1730456352233887, + "learning_rate": 1e-05, + "loss": 0.2497, + "step": 3240 + }, + { + "epoch": 0.01113677328270956, + "grad_norm": 1.1563142538070679, + "learning_rate": 1e-05, + "loss": 0.2439, + "step": 3250 + }, + { + "epoch": 0.01117104027742559, + "grad_norm": 1.1030769348144531, + "learning_rate": 1e-05, + "loss": 0.2671, + "step": 3260 + }, + { + "epoch": 0.01120530727214162, + "grad_norm": 1.1719223260879517, + "learning_rate": 1e-05, + "loss": 0.2501, + "step": 3270 + }, + { + "epoch": 0.011239574266857648, + "grad_norm": 1.1840440034866333, + "learning_rate": 1e-05, + "loss": 0.2643, + "step": 3280 + }, + { + "epoch": 0.011273841261573677, + "grad_norm": 1.1928170919418335, + "learning_rate": 1e-05, + "loss": 0.2629, + "step": 3290 + }, + { + "epoch": 0.011308108256289707, + "grad_norm": 1.0311812162399292, + "learning_rate": 1e-05, + "loss": 0.2552, + "step": 3300 + }, + { + "epoch": 0.011342375251005737, + "grad_norm": 1.1625889539718628, + "learning_rate": 1e-05, + "loss": 0.2561, + "step": 3310 + }, + { + "epoch": 0.011376642245721765, + "grad_norm": 1.0287625789642334, + "learning_rate": 1e-05, + "loss": 0.2341, + "step": 3320 + }, + { + "epoch": 0.011410909240437795, + "grad_norm": 1.1310815811157227, + "learning_rate": 1e-05, + "loss": 0.2554, + "step": 3330 + }, + { + "epoch": 0.011445176235153825, + "grad_norm": 1.1266168355941772, + "learning_rate": 1e-05, + "loss": 0.234, + "step": 3340 + }, + { + "epoch": 0.011479443229869855, + "grad_norm": 1.1979014873504639, + "learning_rate": 1e-05, + "loss": 0.2559, + "step": 3350 + }, + { + "epoch": 0.011513710224585883, + "grad_norm": 1.0378515720367432, + "learning_rate": 1e-05, + "loss": 0.2502, + "step": 3360 + }, + { + "epoch": 0.011547977219301913, + "grad_norm": 1.1832512617111206, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 3370 + }, + { + "epoch": 0.011582244214017942, + "grad_norm": 0.9605569839477539, + "learning_rate": 1e-05, + "loss": 0.2349, + "step": 3380 + }, + { + "epoch": 0.011616511208733972, + "grad_norm": 1.0463056564331055, + "learning_rate": 1e-05, + "loss": 0.2328, + "step": 3390 + }, + { + "epoch": 0.01165077820345, + "grad_norm": 1.1021932363510132, + "learning_rate": 1e-05, + "loss": 0.2383, + "step": 3400 + }, + { + "epoch": 0.01168504519816603, + "grad_norm": 1.040493130683899, + "learning_rate": 1e-05, + "loss": 0.2374, + "step": 3410 + }, + { + "epoch": 0.01171931219288206, + "grad_norm": 1.1483063697814941, + "learning_rate": 1e-05, + "loss": 0.2398, + "step": 3420 + }, + { + "epoch": 0.01175357918759809, + "grad_norm": 1.0316531658172607, + "learning_rate": 1e-05, + "loss": 0.2329, + "step": 3430 + }, + { + "epoch": 0.011787846182314118, + "grad_norm": 1.1677886247634888, + "learning_rate": 1e-05, + "loss": 0.2493, + "step": 3440 + }, + { + "epoch": 0.011822113177030148, + "grad_norm": 1.2078930139541626, + "learning_rate": 1e-05, + "loss": 0.2337, + "step": 3450 + }, + { + "epoch": 0.011856380171746178, + "grad_norm": 1.178202509880066, + "learning_rate": 1e-05, + "loss": 0.239, + "step": 3460 + }, + { + "epoch": 0.011890647166462208, + "grad_norm": 1.0453248023986816, + "learning_rate": 1e-05, + "loss": 0.2233, + "step": 3470 + }, + { + "epoch": 0.011924914161178236, + "grad_norm": 1.0171067714691162, + "learning_rate": 1e-05, + "loss": 0.2338, + "step": 3480 + }, + { + "epoch": 0.011959181155894266, + "grad_norm": 1.051792860031128, + "learning_rate": 1e-05, + "loss": 0.2394, + "step": 3490 + }, + { + "epoch": 0.011993448150610295, + "grad_norm": 1.1237847805023193, + "learning_rate": 1e-05, + "loss": 0.2428, + "step": 3500 + }, + { + "epoch": 0.011993448150610295, + "eval_cer": 13.071088375374845, + "eval_loss": 0.25454944372177124, + "eval_normalized_cer": 9.542366107114308, + "eval_runtime": 228.9468, + "eval_samples_per_second": 2.236, + "eval_steps_per_second": 0.035, + "step": 3500 + }, + { + "epoch": 0.012027715145326325, + "grad_norm": 1.1366350650787354, + "learning_rate": 1e-05, + "loss": 0.2353, + "step": 3510 + }, + { + "epoch": 0.012061982140042353, + "grad_norm": 1.136927604675293, + "learning_rate": 1e-05, + "loss": 0.2358, + "step": 3520 + }, + { + "epoch": 0.012096249134758383, + "grad_norm": 1.1875656843185425, + "learning_rate": 1e-05, + "loss": 0.2305, + "step": 3530 + }, + { + "epoch": 0.012130516129474413, + "grad_norm": 1.2016057968139648, + "learning_rate": 1e-05, + "loss": 0.2435, + "step": 3540 + }, + { + "epoch": 0.012164783124190443, + "grad_norm": 1.209622859954834, + "learning_rate": 1e-05, + "loss": 0.2361, + "step": 3550 + }, + { + "epoch": 0.012199050118906471, + "grad_norm": 1.0696970224380493, + "learning_rate": 1e-05, + "loss": 0.2385, + "step": 3560 + }, + { + "epoch": 0.0122333171136225, + "grad_norm": 1.2674167156219482, + "learning_rate": 1e-05, + "loss": 0.243, + "step": 3570 + }, + { + "epoch": 0.01226758410833853, + "grad_norm": 1.2928141355514526, + "learning_rate": 1e-05, + "loss": 0.2491, + "step": 3580 + }, + { + "epoch": 0.01230185110305456, + "grad_norm": 1.0642272233963013, + "learning_rate": 1e-05, + "loss": 0.2356, + "step": 3590 + }, + { + "epoch": 0.012336118097770589, + "grad_norm": 1.0935972929000854, + "learning_rate": 1e-05, + "loss": 0.2389, + "step": 3600 + }, + { + "epoch": 0.012370385092486618, + "grad_norm": 1.180668830871582, + "learning_rate": 1e-05, + "loss": 0.2409, + "step": 3610 + }, + { + "epoch": 0.012404652087202648, + "grad_norm": 1.2312487363815308, + "learning_rate": 1e-05, + "loss": 0.2478, + "step": 3620 + }, + { + "epoch": 0.012438919081918678, + "grad_norm": 0.947522759437561, + "learning_rate": 1e-05, + "loss": 0.2281, + "step": 3630 + }, + { + "epoch": 0.012473186076634706, + "grad_norm": 1.0618727207183838, + "learning_rate": 1e-05, + "loss": 0.2423, + "step": 3640 + }, + { + "epoch": 0.012507453071350736, + "grad_norm": 1.0766098499298096, + "learning_rate": 1e-05, + "loss": 0.2364, + "step": 3650 + }, + { + "epoch": 0.012541720066066766, + "grad_norm": 1.1174747943878174, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 3660 + }, + { + "epoch": 0.012575987060782796, + "grad_norm": 1.1940118074417114, + "learning_rate": 1e-05, + "loss": 0.2212, + "step": 3670 + }, + { + "epoch": 0.012610254055498824, + "grad_norm": 1.1407246589660645, + "learning_rate": 1e-05, + "loss": 0.2423, + "step": 3680 + }, + { + "epoch": 0.012644521050214854, + "grad_norm": 1.2646050453186035, + "learning_rate": 1e-05, + "loss": 0.2252, + "step": 3690 + }, + { + "epoch": 0.012678788044930884, + "grad_norm": 1.130337119102478, + "learning_rate": 1e-05, + "loss": 0.2131, + "step": 3700 + }, + { + "epoch": 0.012713055039646913, + "grad_norm": 1.1432557106018066, + "learning_rate": 1e-05, + "loss": 0.2386, + "step": 3710 + }, + { + "epoch": 0.012747322034362941, + "grad_norm": 1.1370545625686646, + "learning_rate": 1e-05, + "loss": 0.2347, + "step": 3720 + }, + { + "epoch": 0.012781589029078971, + "grad_norm": 1.3126403093338013, + "learning_rate": 1e-05, + "loss": 0.2159, + "step": 3730 + }, + { + "epoch": 0.012815856023795001, + "grad_norm": 1.2375295162200928, + "learning_rate": 1e-05, + "loss": 0.2275, + "step": 3740 + }, + { + "epoch": 0.012850123018511031, + "grad_norm": 1.0877372026443481, + "learning_rate": 1e-05, + "loss": 0.2201, + "step": 3750 + }, + { + "epoch": 0.012884390013227059, + "grad_norm": 1.1122978925704956, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 3760 + }, + { + "epoch": 0.012918657007943089, + "grad_norm": 1.0270159244537354, + "learning_rate": 1e-05, + "loss": 0.2313, + "step": 3770 + }, + { + "epoch": 0.012952924002659119, + "grad_norm": 1.1370947360992432, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 3780 + }, + { + "epoch": 0.012987190997375149, + "grad_norm": 1.2888813018798828, + "learning_rate": 1e-05, + "loss": 0.2384, + "step": 3790 + }, + { + "epoch": 0.013021457992091178, + "grad_norm": 1.2443634271621704, + "learning_rate": 1e-05, + "loss": 0.2218, + "step": 3800 + }, + { + "epoch": 0.013055724986807207, + "grad_norm": 1.1919447183609009, + "learning_rate": 1e-05, + "loss": 0.2277, + "step": 3810 + }, + { + "epoch": 0.013089991981523236, + "grad_norm": 1.140600562095642, + "learning_rate": 1e-05, + "loss": 0.2317, + "step": 3820 + }, + { + "epoch": 0.013124258976239266, + "grad_norm": 1.074697494506836, + "learning_rate": 1e-05, + "loss": 0.2273, + "step": 3830 + }, + { + "epoch": 0.013158525970955296, + "grad_norm": 1.1003391742706299, + "learning_rate": 1e-05, + "loss": 0.2217, + "step": 3840 + }, + { + "epoch": 0.013192792965671324, + "grad_norm": 1.1427338123321533, + "learning_rate": 1e-05, + "loss": 0.2377, + "step": 3850 + }, + { + "epoch": 0.013227059960387354, + "grad_norm": 1.0806514024734497, + "learning_rate": 1e-05, + "loss": 0.2332, + "step": 3860 + }, + { + "epoch": 0.013261326955103384, + "grad_norm": 1.1547067165374756, + "learning_rate": 1e-05, + "loss": 0.2306, + "step": 3870 + }, + { + "epoch": 0.013295593949819414, + "grad_norm": 1.2483099699020386, + "learning_rate": 1e-05, + "loss": 0.2166, + "step": 3880 + }, + { + "epoch": 0.013329860944535442, + "grad_norm": 1.096939206123352, + "learning_rate": 1e-05, + "loss": 0.2253, + "step": 3890 + }, + { + "epoch": 0.013364127939251472, + "grad_norm": 1.1876115798950195, + "learning_rate": 1e-05, + "loss": 0.2377, + "step": 3900 + }, + { + "epoch": 0.013398394933967502, + "grad_norm": 1.1380902528762817, + "learning_rate": 1e-05, + "loss": 0.2256, + "step": 3910 + }, + { + "epoch": 0.013432661928683531, + "grad_norm": 1.0738089084625244, + "learning_rate": 1e-05, + "loss": 0.2307, + "step": 3920 + }, + { + "epoch": 0.01346692892339956, + "grad_norm": 1.0351170301437378, + "learning_rate": 1e-05, + "loss": 0.2296, + "step": 3930 + }, + { + "epoch": 0.01350119591811559, + "grad_norm": 1.2752678394317627, + "learning_rate": 1e-05, + "loss": 0.2462, + "step": 3940 + }, + { + "epoch": 0.01353546291283162, + "grad_norm": 1.2618532180786133, + "learning_rate": 1e-05, + "loss": 0.2364, + "step": 3950 + }, + { + "epoch": 0.013569729907547649, + "grad_norm": 1.1907076835632324, + "learning_rate": 1e-05, + "loss": 0.2397, + "step": 3960 + }, + { + "epoch": 0.013603996902263677, + "grad_norm": 0.9435076117515564, + "learning_rate": 1e-05, + "loss": 0.2391, + "step": 3970 + }, + { + "epoch": 0.013638263896979707, + "grad_norm": 1.0608407258987427, + "learning_rate": 1e-05, + "loss": 0.2241, + "step": 3980 + }, + { + "epoch": 0.013672530891695737, + "grad_norm": 1.0729584693908691, + "learning_rate": 1e-05, + "loss": 0.2237, + "step": 3990 + }, + { + "epoch": 0.013706797886411767, + "grad_norm": 1.2006182670593262, + "learning_rate": 1e-05, + "loss": 0.2386, + "step": 4000 + }, + { + "epoch": 0.013706797886411767, + "eval_cer": 12.594813900158758, + "eval_loss": 0.25156331062316895, + "eval_normalized_cer": 8.912869704236611, + "eval_runtime": 228.7977, + "eval_samples_per_second": 2.238, + "eval_steps_per_second": 0.035, + "step": 4000 + }, + { + "epoch": 0.013741064881127795, + "grad_norm": 1.2020457983016968, + "learning_rate": 1e-05, + "loss": 0.2318, + "step": 4010 + }, + { + "epoch": 0.013775331875843825, + "grad_norm": 1.0251790285110474, + "learning_rate": 1e-05, + "loss": 0.248, + "step": 4020 + }, + { + "epoch": 0.013809598870559854, + "grad_norm": 1.160437822341919, + "learning_rate": 1e-05, + "loss": 0.2385, + "step": 4030 + }, + { + "epoch": 0.013843865865275884, + "grad_norm": 1.025770664215088, + "learning_rate": 1e-05, + "loss": 0.2293, + "step": 4040 + }, + { + "epoch": 0.013878132859991912, + "grad_norm": 1.111954689025879, + "learning_rate": 1e-05, + "loss": 0.2377, + "step": 4050 + }, + { + "epoch": 0.013912399854707942, + "grad_norm": 1.0644809007644653, + "learning_rate": 1e-05, + "loss": 0.2195, + "step": 4060 + }, + { + "epoch": 0.013946666849423972, + "grad_norm": 1.2926712036132812, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 4070 + }, + { + "epoch": 0.013980933844140002, + "grad_norm": 1.2169601917266846, + "learning_rate": 1e-05, + "loss": 0.2401, + "step": 4080 + }, + { + "epoch": 0.01401520083885603, + "grad_norm": 1.1396681070327759, + "learning_rate": 1e-05, + "loss": 0.2305, + "step": 4090 + }, + { + "epoch": 0.01404946783357206, + "grad_norm": 1.2242721319198608, + "learning_rate": 1e-05, + "loss": 0.2301, + "step": 4100 + }, + { + "epoch": 0.01408373482828809, + "grad_norm": 1.195324420928955, + "learning_rate": 1e-05, + "loss": 0.2368, + "step": 4110 + }, + { + "epoch": 0.01411800182300412, + "grad_norm": 1.2345412969589233, + "learning_rate": 1e-05, + "loss": 0.2301, + "step": 4120 + }, + { + "epoch": 0.014152268817720148, + "grad_norm": 1.1502156257629395, + "learning_rate": 1e-05, + "loss": 0.2327, + "step": 4130 + }, + { + "epoch": 0.014186535812436177, + "grad_norm": 1.2128121852874756, + "learning_rate": 1e-05, + "loss": 0.2458, + "step": 4140 + }, + { + "epoch": 0.014220802807152207, + "grad_norm": 1.2618858814239502, + "learning_rate": 1e-05, + "loss": 0.231, + "step": 4150 + }, + { + "epoch": 0.014255069801868237, + "grad_norm": 1.0879299640655518, + "learning_rate": 1e-05, + "loss": 0.2302, + "step": 4160 + }, + { + "epoch": 0.014289336796584265, + "grad_norm": 0.9794358015060425, + "learning_rate": 1e-05, + "loss": 0.239, + "step": 4170 + }, + { + "epoch": 0.014323603791300295, + "grad_norm": 1.1454006433486938, + "learning_rate": 1e-05, + "loss": 0.2328, + "step": 4180 + }, + { + "epoch": 0.014357870786016325, + "grad_norm": 1.223686933517456, + "learning_rate": 1e-05, + "loss": 0.2211, + "step": 4190 + }, + { + "epoch": 0.014392137780732355, + "grad_norm": 1.1423155069351196, + "learning_rate": 1e-05, + "loss": 0.2391, + "step": 4200 + }, + { + "epoch": 0.014426404775448383, + "grad_norm": 1.1027394533157349, + "learning_rate": 1e-05, + "loss": 0.2279, + "step": 4210 + }, + { + "epoch": 0.014460671770164413, + "grad_norm": 1.1777397394180298, + "learning_rate": 1e-05, + "loss": 0.2293, + "step": 4220 + }, + { + "epoch": 0.014494938764880443, + "grad_norm": 1.01688551902771, + "learning_rate": 1e-05, + "loss": 0.2275, + "step": 4230 + }, + { + "epoch": 0.014529205759596472, + "grad_norm": 1.1520488262176514, + "learning_rate": 1e-05, + "loss": 0.2301, + "step": 4240 + }, + { + "epoch": 0.0145634727543125, + "grad_norm": 1.2820484638214111, + "learning_rate": 1e-05, + "loss": 0.2205, + "step": 4250 + }, + { + "epoch": 0.01459773974902853, + "grad_norm": 1.169291377067566, + "learning_rate": 1e-05, + "loss": 0.2389, + "step": 4260 + }, + { + "epoch": 0.01463200674374456, + "grad_norm": 1.1135886907577515, + "learning_rate": 1e-05, + "loss": 0.2384, + "step": 4270 + }, + { + "epoch": 0.01466627373846059, + "grad_norm": 1.0846205949783325, + "learning_rate": 1e-05, + "loss": 0.223, + "step": 4280 + }, + { + "epoch": 0.014700540733176618, + "grad_norm": 0.981488049030304, + "learning_rate": 1e-05, + "loss": 0.2092, + "step": 4290 + }, + { + "epoch": 0.014734807727892648, + "grad_norm": 1.0437407493591309, + "learning_rate": 1e-05, + "loss": 0.2293, + "step": 4300 + }, + { + "epoch": 0.014769074722608678, + "grad_norm": 1.005792260169983, + "learning_rate": 1e-05, + "loss": 0.2286, + "step": 4310 + }, + { + "epoch": 0.014803341717324708, + "grad_norm": 1.1903142929077148, + "learning_rate": 1e-05, + "loss": 0.231, + "step": 4320 + }, + { + "epoch": 0.014837608712040736, + "grad_norm": 1.1308993101119995, + "learning_rate": 1e-05, + "loss": 0.2458, + "step": 4330 + }, + { + "epoch": 0.014871875706756766, + "grad_norm": 1.0948210954666138, + "learning_rate": 1e-05, + "loss": 0.213, + "step": 4340 + }, + { + "epoch": 0.014906142701472795, + "grad_norm": 1.2674663066864014, + "learning_rate": 1e-05, + "loss": 0.2432, + "step": 4350 + }, + { + "epoch": 0.014940409696188825, + "grad_norm": 1.4228485822677612, + "learning_rate": 1e-05, + "loss": 0.2491, + "step": 4360 + }, + { + "epoch": 0.014974676690904853, + "grad_norm": 1.1533160209655762, + "learning_rate": 1e-05, + "loss": 0.2485, + "step": 4370 + }, + { + "epoch": 0.015008943685620883, + "grad_norm": 1.1454424858093262, + "learning_rate": 1e-05, + "loss": 0.2635, + "step": 4380 + }, + { + "epoch": 0.015043210680336913, + "grad_norm": 1.2944281101226807, + "learning_rate": 1e-05, + "loss": 0.2651, + "step": 4390 + }, + { + "epoch": 0.015077477675052943, + "grad_norm": 1.2148584127426147, + "learning_rate": 1e-05, + "loss": 0.2694, + "step": 4400 + }, + { + "epoch": 0.015111744669768971, + "grad_norm": 1.091282844543457, + "learning_rate": 1e-05, + "loss": 0.2672, + "step": 4410 + }, + { + "epoch": 0.015146011664485001, + "grad_norm": 1.2254445552825928, + "learning_rate": 1e-05, + "loss": 0.2583, + "step": 4420 + }, + { + "epoch": 0.01518027865920103, + "grad_norm": 1.367516279220581, + "learning_rate": 1e-05, + "loss": 0.2586, + "step": 4430 + }, + { + "epoch": 0.01521454565391706, + "grad_norm": 1.1858383417129517, + "learning_rate": 1e-05, + "loss": 0.2764, + "step": 4440 + }, + { + "epoch": 0.01524881264863309, + "grad_norm": 1.1331857442855835, + "learning_rate": 1e-05, + "loss": 0.2577, + "step": 4450 + }, + { + "epoch": 0.015283079643349119, + "grad_norm": 1.2343239784240723, + "learning_rate": 1e-05, + "loss": 0.2661, + "step": 4460 + }, + { + "epoch": 0.015317346638065148, + "grad_norm": 1.0893656015396118, + "learning_rate": 1e-05, + "loss": 0.2538, + "step": 4470 + }, + { + "epoch": 0.015351613632781178, + "grad_norm": 1.1467857360839844, + "learning_rate": 1e-05, + "loss": 0.2496, + "step": 4480 + }, + { + "epoch": 0.015385880627497208, + "grad_norm": 1.2753335237503052, + "learning_rate": 1e-05, + "loss": 0.2797, + "step": 4490 + }, + { + "epoch": 0.015420147622213236, + "grad_norm": 1.1355762481689453, + "learning_rate": 1e-05, + "loss": 0.2672, + "step": 4500 + }, + { + "epoch": 0.015420147622213236, + "eval_cer": 13.159287352266713, + "eval_loss": 0.24996142089366913, + "eval_normalized_cer": 9.59232613908873, + "eval_runtime": 228.0477, + "eval_samples_per_second": 2.245, + "eval_steps_per_second": 0.035, + "step": 4500 + }, + { + "epoch": 0.015454414616929266, + "grad_norm": 1.2256762981414795, + "learning_rate": 1e-05, + "loss": 0.2662, + "step": 4510 + }, + { + "epoch": 0.015488681611645296, + "grad_norm": 1.0631389617919922, + "learning_rate": 1e-05, + "loss": 0.2596, + "step": 4520 + }, + { + "epoch": 0.015522948606361326, + "grad_norm": 1.0759390592575073, + "learning_rate": 1e-05, + "loss": 0.2553, + "step": 4530 + }, + { + "epoch": 0.015557215601077354, + "grad_norm": 1.1867231130599976, + "learning_rate": 1e-05, + "loss": 0.2498, + "step": 4540 + }, + { + "epoch": 0.015591482595793384, + "grad_norm": 1.1203633546829224, + "learning_rate": 1e-05, + "loss": 0.2732, + "step": 4550 + }, + { + "epoch": 0.015625749590509413, + "grad_norm": 1.1223920583724976, + "learning_rate": 1e-05, + "loss": 0.2535, + "step": 4560 + }, + { + "epoch": 0.015660016585225443, + "grad_norm": 1.066497564315796, + "learning_rate": 1e-05, + "loss": 0.2456, + "step": 4570 + }, + { + "epoch": 0.015694283579941473, + "grad_norm": 1.2520133256912231, + "learning_rate": 1e-05, + "loss": 0.2558, + "step": 4580 + }, + { + "epoch": 0.015728550574657503, + "grad_norm": 1.3602423667907715, + "learning_rate": 1e-05, + "loss": 0.2698, + "step": 4590 + }, + { + "epoch": 0.01576281756937353, + "grad_norm": 1.1748729944229126, + "learning_rate": 1e-05, + "loss": 0.2621, + "step": 4600 + }, + { + "epoch": 0.01579708456408956, + "grad_norm": 0.9431802034378052, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 4610 + }, + { + "epoch": 0.01583135155880559, + "grad_norm": 1.0146753787994385, + "learning_rate": 1e-05, + "loss": 0.239, + "step": 4620 + }, + { + "epoch": 0.01586561855352162, + "grad_norm": 1.1340891122817993, + "learning_rate": 1e-05, + "loss": 0.2437, + "step": 4630 + }, + { + "epoch": 0.01589988554823765, + "grad_norm": 1.1456454992294312, + "learning_rate": 1e-05, + "loss": 0.2307, + "step": 4640 + }, + { + "epoch": 0.01593415254295368, + "grad_norm": 1.1026827096939087, + "learning_rate": 1e-05, + "loss": 0.2295, + "step": 4650 + }, + { + "epoch": 0.01596841953766971, + "grad_norm": 1.2215088605880737, + "learning_rate": 1e-05, + "loss": 0.245, + "step": 4660 + }, + { + "epoch": 0.01600268653238574, + "grad_norm": 1.1760615110397339, + "learning_rate": 1e-05, + "loss": 0.2461, + "step": 4670 + }, + { + "epoch": 0.016036953527101765, + "grad_norm": 1.1690876483917236, + "learning_rate": 1e-05, + "loss": 0.2282, + "step": 4680 + }, + { + "epoch": 0.016071220521817794, + "grad_norm": 1.182026743888855, + "learning_rate": 1e-05, + "loss": 0.2351, + "step": 4690 + }, + { + "epoch": 0.016105487516533824, + "grad_norm": 1.0182474851608276, + "learning_rate": 1e-05, + "loss": 0.2284, + "step": 4700 + }, + { + "epoch": 0.016139754511249854, + "grad_norm": 1.2531431913375854, + "learning_rate": 1e-05, + "loss": 0.244, + "step": 4710 + }, + { + "epoch": 0.016174021505965884, + "grad_norm": 0.9633692502975464, + "learning_rate": 1e-05, + "loss": 0.2297, + "step": 4720 + }, + { + "epoch": 0.016208288500681914, + "grad_norm": 1.1144667863845825, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 4730 + }, + { + "epoch": 0.016242555495397944, + "grad_norm": 1.0768555402755737, + "learning_rate": 1e-05, + "loss": 0.2216, + "step": 4740 + }, + { + "epoch": 0.016276822490113974, + "grad_norm": 1.2052035331726074, + "learning_rate": 1e-05, + "loss": 0.2278, + "step": 4750 + }, + { + "epoch": 0.01631108948483, + "grad_norm": 1.0291496515274048, + "learning_rate": 1e-05, + "loss": 0.2226, + "step": 4760 + }, + { + "epoch": 0.01634535647954603, + "grad_norm": 1.2100346088409424, + "learning_rate": 1e-05, + "loss": 0.2278, + "step": 4770 + }, + { + "epoch": 0.01637962347426206, + "grad_norm": 1.214861273765564, + "learning_rate": 1e-05, + "loss": 0.2313, + "step": 4780 + }, + { + "epoch": 0.01641389046897809, + "grad_norm": 1.137210726737976, + "learning_rate": 1e-05, + "loss": 0.2235, + "step": 4790 + }, + { + "epoch": 0.01644815746369412, + "grad_norm": 1.046673059463501, + "learning_rate": 1e-05, + "loss": 0.2231, + "step": 4800 + }, + { + "epoch": 0.01648242445841015, + "grad_norm": 1.08164644241333, + "learning_rate": 1e-05, + "loss": 0.2235, + "step": 4810 + }, + { + "epoch": 0.01651669145312618, + "grad_norm": 1.1432491540908813, + "learning_rate": 1e-05, + "loss": 0.246, + "step": 4820 + }, + { + "epoch": 0.01655095844784221, + "grad_norm": 1.1684173345565796, + "learning_rate": 1e-05, + "loss": 0.218, + "step": 4830 + }, + { + "epoch": 0.016585225442558235, + "grad_norm": 1.0895615816116333, + "learning_rate": 1e-05, + "loss": 0.2109, + "step": 4840 + }, + { + "epoch": 0.016619492437274265, + "grad_norm": 1.1505770683288574, + "learning_rate": 1e-05, + "loss": 0.2283, + "step": 4850 + }, + { + "epoch": 0.016653759431990295, + "grad_norm": 1.3385730981826782, + "learning_rate": 1e-05, + "loss": 0.2344, + "step": 4860 + }, + { + "epoch": 0.016688026426706325, + "grad_norm": 1.109035611152649, + "learning_rate": 1e-05, + "loss": 0.2558, + "step": 4870 + }, + { + "epoch": 0.016722293421422355, + "grad_norm": 1.1834880113601685, + "learning_rate": 1e-05, + "loss": 0.2247, + "step": 4880 + }, + { + "epoch": 0.016756560416138384, + "grad_norm": 1.2369152307510376, + "learning_rate": 1e-05, + "loss": 0.2449, + "step": 4890 + }, + { + "epoch": 0.016790827410854414, + "grad_norm": 1.131173014640808, + "learning_rate": 1e-05, + "loss": 0.2458, + "step": 4900 + }, + { + "epoch": 0.016825094405570444, + "grad_norm": 1.1100351810455322, + "learning_rate": 1e-05, + "loss": 0.2523, + "step": 4910 + }, + { + "epoch": 0.01685936140028647, + "grad_norm": 1.1857340335845947, + "learning_rate": 1e-05, + "loss": 0.2523, + "step": 4920 + }, + { + "epoch": 0.0168936283950025, + "grad_norm": 1.1568819284439087, + "learning_rate": 1e-05, + "loss": 0.2549, + "step": 4930 + }, + { + "epoch": 0.01692789538971853, + "grad_norm": 1.104872465133667, + "learning_rate": 1e-05, + "loss": 0.2449, + "step": 4940 + }, + { + "epoch": 0.01696216238443456, + "grad_norm": 1.0907660722732544, + "learning_rate": 1e-05, + "loss": 0.2496, + "step": 4950 + }, + { + "epoch": 0.01699642937915059, + "grad_norm": 1.1100903749465942, + "learning_rate": 1e-05, + "loss": 0.239, + "step": 4960 + }, + { + "epoch": 0.01703069637386662, + "grad_norm": 1.141200065612793, + "learning_rate": 1e-05, + "loss": 0.2459, + "step": 4970 + }, + { + "epoch": 0.01706496336858265, + "grad_norm": 1.2853361368179321, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 4980 + }, + { + "epoch": 0.01709923036329868, + "grad_norm": 1.1542645692825317, + "learning_rate": 1e-05, + "loss": 0.2635, + "step": 4990 + }, + { + "epoch": 0.017133497358014706, + "grad_norm": 1.2022640705108643, + "learning_rate": 1e-05, + "loss": 0.2371, + "step": 5000 + }, + { + "epoch": 0.017133497358014706, + "eval_cer": 12.92115011465867, + "eval_loss": 0.2521001100540161, + "eval_normalized_cer": 9.30255795363709, + "eval_runtime": 227.4868, + "eval_samples_per_second": 2.251, + "eval_steps_per_second": 0.035, + "step": 5000 + }, + { + "epoch": 0.017167764352730736, + "grad_norm": 1.0765001773834229, + "learning_rate": 1e-05, + "loss": 0.2455, + "step": 5010 + }, + { + "epoch": 0.017202031347446765, + "grad_norm": 1.0711493492126465, + "learning_rate": 1e-05, + "loss": 0.2422, + "step": 5020 + }, + { + "epoch": 0.017236298342162795, + "grad_norm": 1.0719484090805054, + "learning_rate": 1e-05, + "loss": 0.2531, + "step": 5030 + }, + { + "epoch": 0.017270565336878825, + "grad_norm": 1.1884721517562866, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 5040 + }, + { + "epoch": 0.017304832331594855, + "grad_norm": 1.068827509880066, + "learning_rate": 1e-05, + "loss": 0.2474, + "step": 5050 + }, + { + "epoch": 0.017339099326310885, + "grad_norm": 1.1308655738830566, + "learning_rate": 1e-05, + "loss": 0.2627, + "step": 5060 + }, + { + "epoch": 0.017373366321026915, + "grad_norm": 1.1527314186096191, + "learning_rate": 1e-05, + "loss": 0.2535, + "step": 5070 + }, + { + "epoch": 0.017407633315742944, + "grad_norm": 1.1800657510757446, + "learning_rate": 1e-05, + "loss": 0.2587, + "step": 5080 + }, + { + "epoch": 0.01744190031045897, + "grad_norm": 1.095189094543457, + "learning_rate": 1e-05, + "loss": 0.2424, + "step": 5090 + }, + { + "epoch": 0.017476167305175, + "grad_norm": 1.109617829322815, + "learning_rate": 1e-05, + "loss": 0.2543, + "step": 5100 + }, + { + "epoch": 0.01751043429989103, + "grad_norm": 1.2110544443130493, + "learning_rate": 1e-05, + "loss": 0.2687, + "step": 5110 + }, + { + "epoch": 0.01754470129460706, + "grad_norm": 1.0466723442077637, + "learning_rate": 1e-05, + "loss": 0.2424, + "step": 5120 + }, + { + "epoch": 0.01757896828932309, + "grad_norm": 1.2060648202896118, + "learning_rate": 1e-05, + "loss": 0.2337, + "step": 5130 + }, + { + "epoch": 0.01761323528403912, + "grad_norm": 1.203142762184143, + "learning_rate": 1e-05, + "loss": 0.2556, + "step": 5140 + }, + { + "epoch": 0.01764750227875515, + "grad_norm": 1.0751283168792725, + "learning_rate": 1e-05, + "loss": 0.2235, + "step": 5150 + }, + { + "epoch": 0.01768176927347118, + "grad_norm": 1.1377781629562378, + "learning_rate": 1e-05, + "loss": 0.2448, + "step": 5160 + }, + { + "epoch": 0.017716036268187206, + "grad_norm": 1.147454023361206, + "learning_rate": 1e-05, + "loss": 0.2172, + "step": 5170 + }, + { + "epoch": 0.017750303262903236, + "grad_norm": 1.129897952079773, + "learning_rate": 1e-05, + "loss": 0.2418, + "step": 5180 + }, + { + "epoch": 0.017784570257619266, + "grad_norm": 1.1261131763458252, + "learning_rate": 1e-05, + "loss": 0.2328, + "step": 5190 + }, + { + "epoch": 0.017818837252335296, + "grad_norm": 1.0794824361801147, + "learning_rate": 1e-05, + "loss": 0.2546, + "step": 5200 + }, + { + "epoch": 0.017853104247051325, + "grad_norm": 1.1870142221450806, + "learning_rate": 1e-05, + "loss": 0.249, + "step": 5210 + }, + { + "epoch": 0.017887371241767355, + "grad_norm": 1.0414400100708008, + "learning_rate": 1e-05, + "loss": 0.2285, + "step": 5220 + }, + { + "epoch": 0.017921638236483385, + "grad_norm": 1.173405647277832, + "learning_rate": 1e-05, + "loss": 0.2529, + "step": 5230 + }, + { + "epoch": 0.017955905231199415, + "grad_norm": 1.039650797843933, + "learning_rate": 1e-05, + "loss": 0.2321, + "step": 5240 + }, + { + "epoch": 0.01799017222591544, + "grad_norm": 1.0359266996383667, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 5250 + }, + { + "epoch": 0.01802443922063147, + "grad_norm": 1.0630840063095093, + "learning_rate": 1e-05, + "loss": 0.2117, + "step": 5260 + }, + { + "epoch": 0.0180587062153475, + "grad_norm": 1.0937180519104004, + "learning_rate": 1e-05, + "loss": 0.2454, + "step": 5270 + }, + { + "epoch": 0.01809297321006353, + "grad_norm": 1.1015993356704712, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 5280 + }, + { + "epoch": 0.01812724020477956, + "grad_norm": 1.060584545135498, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 5290 + }, + { + "epoch": 0.01816150719949559, + "grad_norm": 1.1389795541763306, + "learning_rate": 1e-05, + "loss": 0.233, + "step": 5300 + }, + { + "epoch": 0.01819577419421162, + "grad_norm": 1.0018917322158813, + "learning_rate": 1e-05, + "loss": 0.2453, + "step": 5310 + }, + { + "epoch": 0.01823004118892765, + "grad_norm": 1.0546092987060547, + "learning_rate": 1e-05, + "loss": 0.2333, + "step": 5320 + }, + { + "epoch": 0.018264308183643677, + "grad_norm": 1.1121848821640015, + "learning_rate": 1e-05, + "loss": 0.2317, + "step": 5330 + }, + { + "epoch": 0.018298575178359706, + "grad_norm": 1.1613191366195679, + "learning_rate": 1e-05, + "loss": 0.2549, + "step": 5340 + }, + { + "epoch": 0.018332842173075736, + "grad_norm": 1.1250524520874023, + "learning_rate": 1e-05, + "loss": 0.2471, + "step": 5350 + }, + { + "epoch": 0.018367109167791766, + "grad_norm": 1.0905226469039917, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 5360 + }, + { + "epoch": 0.018401376162507796, + "grad_norm": 0.9885173439979553, + "learning_rate": 1e-05, + "loss": 0.2542, + "step": 5370 + }, + { + "epoch": 0.018435643157223826, + "grad_norm": 1.288758635520935, + "learning_rate": 1e-05, + "loss": 0.2472, + "step": 5380 + }, + { + "epoch": 0.018469910151939856, + "grad_norm": 1.2433462142944336, + "learning_rate": 1e-05, + "loss": 0.2427, + "step": 5390 + }, + { + "epoch": 0.018504177146655885, + "grad_norm": 1.2367336750030518, + "learning_rate": 1e-05, + "loss": 0.2511, + "step": 5400 + }, + { + "epoch": 0.018538444141371912, + "grad_norm": 1.1871395111083984, + "learning_rate": 1e-05, + "loss": 0.2276, + "step": 5410 + }, + { + "epoch": 0.01857271113608794, + "grad_norm": 0.9569379091262817, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 5420 + }, + { + "epoch": 0.01860697813080397, + "grad_norm": 1.1487014293670654, + "learning_rate": 1e-05, + "loss": 0.2295, + "step": 5430 + }, + { + "epoch": 0.01864124512552, + "grad_norm": 1.0800844430923462, + "learning_rate": 1e-05, + "loss": 0.2247, + "step": 5440 + }, + { + "epoch": 0.01867551212023603, + "grad_norm": 1.1834380626678467, + "learning_rate": 1e-05, + "loss": 0.226, + "step": 5450 + }, + { + "epoch": 0.01870977911495206, + "grad_norm": 1.0035191774368286, + "learning_rate": 1e-05, + "loss": 0.2414, + "step": 5460 + }, + { + "epoch": 0.01874404610966809, + "grad_norm": 1.0685466527938843, + "learning_rate": 1e-05, + "loss": 0.2449, + "step": 5470 + }, + { + "epoch": 0.01877831310438412, + "grad_norm": 1.1921565532684326, + "learning_rate": 1e-05, + "loss": 0.2419, + "step": 5480 + }, + { + "epoch": 0.018812580099100147, + "grad_norm": 1.1201281547546387, + "learning_rate": 1e-05, + "loss": 0.255, + "step": 5490 + }, + { + "epoch": 0.018846847093816177, + "grad_norm": 1.1162866353988647, + "learning_rate": 1e-05, + "loss": 0.2426, + "step": 5500 + }, + { + "epoch": 0.018846847093816177, + "eval_cer": 13.238666431469396, + "eval_loss": 0.25262224674224854, + "eval_normalized_cer": 9.562350119904076, + "eval_runtime": 229.0802, + "eval_samples_per_second": 2.235, + "eval_steps_per_second": 0.035, + "step": 5500 + }, + { + "epoch": 0.018881114088532207, + "grad_norm": 1.0215845108032227, + "learning_rate": 1e-05, + "loss": 0.2368, + "step": 5510 + }, + { + "epoch": 0.018915381083248237, + "grad_norm": 1.0062447786331177, + "learning_rate": 1e-05, + "loss": 0.2308, + "step": 5520 + }, + { + "epoch": 0.018949648077964266, + "grad_norm": 1.223649024963379, + "learning_rate": 1e-05, + "loss": 0.2409, + "step": 5530 + }, + { + "epoch": 0.018983915072680296, + "grad_norm": 1.2076172828674316, + "learning_rate": 1e-05, + "loss": 0.2236, + "step": 5540 + }, + { + "epoch": 0.019018182067396326, + "grad_norm": 1.154416561126709, + "learning_rate": 1e-05, + "loss": 0.2419, + "step": 5550 + }, + { + "epoch": 0.019052449062112356, + "grad_norm": 1.284858226776123, + "learning_rate": 1e-05, + "loss": 0.2321, + "step": 5560 + }, + { + "epoch": 0.019086716056828382, + "grad_norm": 1.0406948328018188, + "learning_rate": 1e-05, + "loss": 0.2485, + "step": 5570 + }, + { + "epoch": 0.019120983051544412, + "grad_norm": 1.1980571746826172, + "learning_rate": 1e-05, + "loss": 0.2274, + "step": 5580 + }, + { + "epoch": 0.019155250046260442, + "grad_norm": 1.073560357093811, + "learning_rate": 1e-05, + "loss": 0.2498, + "step": 5590 + }, + { + "epoch": 0.019189517040976472, + "grad_norm": 1.0982617139816284, + "learning_rate": 1e-05, + "loss": 0.2391, + "step": 5600 + }, + { + "epoch": 0.019223784035692502, + "grad_norm": 1.015085220336914, + "learning_rate": 1e-05, + "loss": 0.2269, + "step": 5610 + }, + { + "epoch": 0.01925805103040853, + "grad_norm": 1.238585352897644, + "learning_rate": 1e-05, + "loss": 0.2428, + "step": 5620 + }, + { + "epoch": 0.01929231802512456, + "grad_norm": 1.3326079845428467, + "learning_rate": 1e-05, + "loss": 0.25, + "step": 5630 + }, + { + "epoch": 0.01932658501984059, + "grad_norm": 1.1263608932495117, + "learning_rate": 1e-05, + "loss": 0.234, + "step": 5640 + }, + { + "epoch": 0.019360852014556618, + "grad_norm": 1.083595633506775, + "learning_rate": 1e-05, + "loss": 0.2504, + "step": 5650 + }, + { + "epoch": 0.019395119009272647, + "grad_norm": 1.0787022113800049, + "learning_rate": 1e-05, + "loss": 0.2248, + "step": 5660 + }, + { + "epoch": 0.019429386003988677, + "grad_norm": 1.312565803527832, + "learning_rate": 1e-05, + "loss": 0.263, + "step": 5670 + }, + { + "epoch": 0.019463652998704707, + "grad_norm": 1.0305407047271729, + "learning_rate": 1e-05, + "loss": 0.2358, + "step": 5680 + }, + { + "epoch": 0.019497919993420737, + "grad_norm": 1.0905306339263916, + "learning_rate": 1e-05, + "loss": 0.2358, + "step": 5690 + }, + { + "epoch": 0.019532186988136767, + "grad_norm": 1.1105730533599854, + "learning_rate": 1e-05, + "loss": 0.2371, + "step": 5700 + }, + { + "epoch": 0.019566453982852797, + "grad_norm": 1.1664555072784424, + "learning_rate": 1e-05, + "loss": 0.244, + "step": 5710 + }, + { + "epoch": 0.019600720977568827, + "grad_norm": 1.0702719688415527, + "learning_rate": 1e-05, + "loss": 0.2305, + "step": 5720 + }, + { + "epoch": 0.019634987972284856, + "grad_norm": 1.0736626386642456, + "learning_rate": 1e-05, + "loss": 0.2406, + "step": 5730 + }, + { + "epoch": 0.019669254967000883, + "grad_norm": 1.0510461330413818, + "learning_rate": 1e-05, + "loss": 0.2335, + "step": 5740 + }, + { + "epoch": 0.019703521961716913, + "grad_norm": 1.0435370206832886, + "learning_rate": 1e-05, + "loss": 0.2211, + "step": 5750 + }, + { + "epoch": 0.019737788956432942, + "grad_norm": 1.2461049556732178, + "learning_rate": 1e-05, + "loss": 0.2188, + "step": 5760 + }, + { + "epoch": 0.019772055951148972, + "grad_norm": 1.0351046323776245, + "learning_rate": 1e-05, + "loss": 0.2269, + "step": 5770 + }, + { + "epoch": 0.019806322945865002, + "grad_norm": 1.124671459197998, + "learning_rate": 1e-05, + "loss": 0.2284, + "step": 5780 + }, + { + "epoch": 0.019840589940581032, + "grad_norm": 1.145488977432251, + "learning_rate": 1e-05, + "loss": 0.2415, + "step": 5790 + }, + { + "epoch": 0.019874856935297062, + "grad_norm": 1.1410046815872192, + "learning_rate": 1e-05, + "loss": 0.2296, + "step": 5800 + }, + { + "epoch": 0.01990912393001309, + "grad_norm": 1.2782517671585083, + "learning_rate": 1e-05, + "loss": 0.2367, + "step": 5810 + }, + { + "epoch": 0.019943390924729118, + "grad_norm": 1.204562783241272, + "learning_rate": 1e-05, + "loss": 0.2289, + "step": 5820 + }, + { + "epoch": 0.019977657919445148, + "grad_norm": 1.1141811609268188, + "learning_rate": 1e-05, + "loss": 0.2223, + "step": 5830 + }, + { + "epoch": 0.020011924914161178, + "grad_norm": 1.1790316104888916, + "learning_rate": 1e-05, + "loss": 0.2308, + "step": 5840 + }, + { + "epoch": 0.020046191908877208, + "grad_norm": 1.0944266319274902, + "learning_rate": 1e-05, + "loss": 0.2366, + "step": 5850 + }, + { + "epoch": 0.020080458903593237, + "grad_norm": 1.0892263650894165, + "learning_rate": 1e-05, + "loss": 0.2384, + "step": 5860 + }, + { + "epoch": 0.020114725898309267, + "grad_norm": 1.1419873237609863, + "learning_rate": 1e-05, + "loss": 0.2414, + "step": 5870 + }, + { + "epoch": 0.020148992893025297, + "grad_norm": 1.2230783700942993, + "learning_rate": 1e-05, + "loss": 0.2394, + "step": 5880 + }, + { + "epoch": 0.020183259887741327, + "grad_norm": 1.1309173107147217, + "learning_rate": 1e-05, + "loss": 0.2561, + "step": 5890 + }, + { + "epoch": 0.020217526882457353, + "grad_norm": 1.2405802011489868, + "learning_rate": 1e-05, + "loss": 0.259, + "step": 5900 + }, + { + "epoch": 0.020251793877173383, + "grad_norm": 1.2853388786315918, + "learning_rate": 1e-05, + "loss": 0.2668, + "step": 5910 + }, + { + "epoch": 0.020286060871889413, + "grad_norm": 1.299046277999878, + "learning_rate": 1e-05, + "loss": 0.251, + "step": 5920 + }, + { + "epoch": 0.020320327866605443, + "grad_norm": 1.142052173614502, + "learning_rate": 1e-05, + "loss": 0.2655, + "step": 5930 + }, + { + "epoch": 0.020354594861321473, + "grad_norm": 1.3770766258239746, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 5940 + }, + { + "epoch": 0.020388861856037502, + "grad_norm": 1.1458237171173096, + "learning_rate": 1e-05, + "loss": 0.2742, + "step": 5950 + }, + { + "epoch": 0.020423128850753532, + "grad_norm": 1.3130786418914795, + "learning_rate": 1e-05, + "loss": 0.2514, + "step": 5960 + }, + { + "epoch": 0.020457395845469562, + "grad_norm": 1.2816088199615479, + "learning_rate": 1e-05, + "loss": 0.2593, + "step": 5970 + }, + { + "epoch": 0.02049166284018559, + "grad_norm": 1.0405460596084595, + "learning_rate": 1e-05, + "loss": 0.2608, + "step": 5980 + }, + { + "epoch": 0.02052592983490162, + "grad_norm": 1.2035329341888428, + "learning_rate": 1e-05, + "loss": 0.2558, + "step": 5990 + }, + { + "epoch": 0.020560196829617648, + "grad_norm": 1.0495450496673584, + "learning_rate": 1e-05, + "loss": 0.2468, + "step": 6000 + }, + { + "epoch": 0.020560196829617648, + "eval_cer": 13.079908273064033, + "eval_loss": 0.2540421485900879, + "eval_normalized_cer": 9.292565947242206, + "eval_runtime": 227.4153, + "eval_samples_per_second": 2.251, + "eval_steps_per_second": 0.035, + "step": 6000 + }, + { + "epoch": 0.020594463824333678, + "grad_norm": 1.1614056825637817, + "learning_rate": 1e-05, + "loss": 0.2527, + "step": 6010 + }, + { + "epoch": 0.020628730819049708, + "grad_norm": 1.1835705041885376, + "learning_rate": 1e-05, + "loss": 0.2592, + "step": 6020 + }, + { + "epoch": 0.020662997813765738, + "grad_norm": 1.1335136890411377, + "learning_rate": 1e-05, + "loss": 0.2727, + "step": 6030 + }, + { + "epoch": 0.020697264808481768, + "grad_norm": 1.052079439163208, + "learning_rate": 1e-05, + "loss": 0.2514, + "step": 6040 + }, + { + "epoch": 0.020731531803197797, + "grad_norm": 1.096330165863037, + "learning_rate": 1e-05, + "loss": 0.2684, + "step": 6050 + }, + { + "epoch": 0.020765798797913824, + "grad_norm": 1.2359880208969116, + "learning_rate": 1e-05, + "loss": 0.2638, + "step": 6060 + }, + { + "epoch": 0.020800065792629854, + "grad_norm": 1.2259430885314941, + "learning_rate": 1e-05, + "loss": 0.2488, + "step": 6070 + }, + { + "epoch": 0.020834332787345883, + "grad_norm": 1.0531619787216187, + "learning_rate": 1e-05, + "loss": 0.2584, + "step": 6080 + }, + { + "epoch": 0.020868599782061913, + "grad_norm": 1.1754058599472046, + "learning_rate": 1e-05, + "loss": 0.254, + "step": 6090 + }, + { + "epoch": 0.020902866776777943, + "grad_norm": 1.0922538042068481, + "learning_rate": 1e-05, + "loss": 0.2522, + "step": 6100 + }, + { + "epoch": 0.020937133771493973, + "grad_norm": 1.1970179080963135, + "learning_rate": 1e-05, + "loss": 0.267, + "step": 6110 + }, + { + "epoch": 0.020971400766210003, + "grad_norm": 1.2625236511230469, + "learning_rate": 1e-05, + "loss": 0.2379, + "step": 6120 + }, + { + "epoch": 0.021005667760926033, + "grad_norm": 1.152846336364746, + "learning_rate": 1e-05, + "loss": 0.2429, + "step": 6130 + }, + { + "epoch": 0.02103993475564206, + "grad_norm": 1.1184160709381104, + "learning_rate": 1e-05, + "loss": 0.2566, + "step": 6140 + }, + { + "epoch": 0.02107420175035809, + "grad_norm": 1.1153484582901, + "learning_rate": 1e-05, + "loss": 0.2583, + "step": 6150 + }, + { + "epoch": 0.02110846874507412, + "grad_norm": 1.2822504043579102, + "learning_rate": 1e-05, + "loss": 0.2535, + "step": 6160 + }, + { + "epoch": 0.02114273573979015, + "grad_norm": 1.1332992315292358, + "learning_rate": 1e-05, + "loss": 0.2799, + "step": 6170 + }, + { + "epoch": 0.02117700273450618, + "grad_norm": 1.0284112691879272, + "learning_rate": 1e-05, + "loss": 0.2458, + "step": 6180 + }, + { + "epoch": 0.02121126972922221, + "grad_norm": 1.1097975969314575, + "learning_rate": 1e-05, + "loss": 0.2513, + "step": 6190 + }, + { + "epoch": 0.021245536723938238, + "grad_norm": 1.168990969657898, + "learning_rate": 1e-05, + "loss": 0.2843, + "step": 6200 + }, + { + "epoch": 0.021279803718654268, + "grad_norm": 0.9956926107406616, + "learning_rate": 1e-05, + "loss": 0.247, + "step": 6210 + }, + { + "epoch": 0.021314070713370294, + "grad_norm": 1.2191492319107056, + "learning_rate": 1e-05, + "loss": 0.2608, + "step": 6220 + }, + { + "epoch": 0.021348337708086324, + "grad_norm": 1.0872688293457031, + "learning_rate": 1e-05, + "loss": 0.2463, + "step": 6230 + }, + { + "epoch": 0.021382604702802354, + "grad_norm": 1.0746614933013916, + "learning_rate": 1e-05, + "loss": 0.244, + "step": 6240 + }, + { + "epoch": 0.021416871697518384, + "grad_norm": 1.1560328006744385, + "learning_rate": 1e-05, + "loss": 0.2639, + "step": 6250 + }, + { + "epoch": 0.021451138692234414, + "grad_norm": 1.1529641151428223, + "learning_rate": 1e-05, + "loss": 0.2585, + "step": 6260 + }, + { + "epoch": 0.021485405686950444, + "grad_norm": 1.0708386898040771, + "learning_rate": 1e-05, + "loss": 0.2669, + "step": 6270 + }, + { + "epoch": 0.021519672681666473, + "grad_norm": 1.208079218864441, + "learning_rate": 1e-05, + "loss": 0.2436, + "step": 6280 + }, + { + "epoch": 0.021553939676382503, + "grad_norm": 1.1871508359909058, + "learning_rate": 1e-05, + "loss": 0.2655, + "step": 6290 + }, + { + "epoch": 0.02158820667109853, + "grad_norm": 1.0997953414916992, + "learning_rate": 1e-05, + "loss": 0.2578, + "step": 6300 + }, + { + "epoch": 0.02162247366581456, + "grad_norm": 1.2404417991638184, + "learning_rate": 1e-05, + "loss": 0.2726, + "step": 6310 + }, + { + "epoch": 0.02165674066053059, + "grad_norm": 1.1724058389663696, + "learning_rate": 1e-05, + "loss": 0.2611, + "step": 6320 + }, + { + "epoch": 0.02169100765524662, + "grad_norm": 1.124932885169983, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 6330 + }, + { + "epoch": 0.02172527464996265, + "grad_norm": 1.129584550857544, + "learning_rate": 1e-05, + "loss": 0.2651, + "step": 6340 + }, + { + "epoch": 0.02175954164467868, + "grad_norm": 1.1869479417800903, + "learning_rate": 1e-05, + "loss": 0.2451, + "step": 6350 + }, + { + "epoch": 0.02179380863939471, + "grad_norm": 1.1753504276275635, + "learning_rate": 1e-05, + "loss": 0.2509, + "step": 6360 + }, + { + "epoch": 0.02182807563411074, + "grad_norm": 1.1704761981964111, + "learning_rate": 1e-05, + "loss": 0.2614, + "step": 6370 + }, + { + "epoch": 0.02186234262882677, + "grad_norm": 1.347970724105835, + "learning_rate": 1e-05, + "loss": 0.253, + "step": 6380 + }, + { + "epoch": 0.021896609623542795, + "grad_norm": 1.0677597522735596, + "learning_rate": 1e-05, + "loss": 0.2539, + "step": 6390 + }, + { + "epoch": 0.021930876618258825, + "grad_norm": 1.1567541360855103, + "learning_rate": 1e-05, + "loss": 0.2621, + "step": 6400 + }, + { + "epoch": 0.021965143612974854, + "grad_norm": 1.1231553554534912, + "learning_rate": 1e-05, + "loss": 0.2453, + "step": 6410 + }, + { + "epoch": 0.021999410607690884, + "grad_norm": 1.0485198497772217, + "learning_rate": 1e-05, + "loss": 0.2503, + "step": 6420 + }, + { + "epoch": 0.022033677602406914, + "grad_norm": 1.12228262424469, + "learning_rate": 1e-05, + "loss": 0.2488, + "step": 6430 + }, + { + "epoch": 0.022067944597122944, + "grad_norm": 1.2610136270523071, + "learning_rate": 1e-05, + "loss": 0.2445, + "step": 6440 + }, + { + "epoch": 0.022102211591838974, + "grad_norm": 0.9546436071395874, + "learning_rate": 1e-05, + "loss": 0.226, + "step": 6450 + }, + { + "epoch": 0.022136478586555004, + "grad_norm": 1.3363466262817383, + "learning_rate": 1e-05, + "loss": 0.2489, + "step": 6460 + }, + { + "epoch": 0.02217074558127103, + "grad_norm": 1.1454704999923706, + "learning_rate": 1e-05, + "loss": 0.2434, + "step": 6470 + }, + { + "epoch": 0.02220501257598706, + "grad_norm": 1.1578549146652222, + "learning_rate": 1e-05, + "loss": 0.2549, + "step": 6480 + }, + { + "epoch": 0.02223927957070309, + "grad_norm": 1.096081018447876, + "learning_rate": 1e-05, + "loss": 0.2472, + "step": 6490 + }, + { + "epoch": 0.02227354656541912, + "grad_norm": 1.2388731241226196, + "learning_rate": 1e-05, + "loss": 0.2457, + "step": 6500 + }, + { + "epoch": 0.02227354656541912, + "eval_cer": 12.929970012347859, + "eval_loss": 0.24839338660240173, + "eval_normalized_cer": 9.242605915267786, + "eval_runtime": 227.6401, + "eval_samples_per_second": 2.249, + "eval_steps_per_second": 0.035, + "step": 6500 + }, + { + "epoch": 0.02230781356013515, + "grad_norm": 1.0306715965270996, + "learning_rate": 1e-05, + "loss": 0.2393, + "step": 6510 + }, + { + "epoch": 0.02234208055485118, + "grad_norm": 1.1339504718780518, + "learning_rate": 1e-05, + "loss": 0.2563, + "step": 6520 + }, + { + "epoch": 0.02237634754956721, + "grad_norm": 0.912266731262207, + "learning_rate": 1e-05, + "loss": 0.2465, + "step": 6530 + }, + { + "epoch": 0.02241061454428324, + "grad_norm": 1.1917020082473755, + "learning_rate": 1e-05, + "loss": 0.2395, + "step": 6540 + }, + { + "epoch": 0.022444881538999265, + "grad_norm": 1.248515248298645, + "learning_rate": 1e-05, + "loss": 0.2479, + "step": 6550 + }, + { + "epoch": 0.022479148533715295, + "grad_norm": 1.180799961090088, + "learning_rate": 1e-05, + "loss": 0.2616, + "step": 6560 + }, + { + "epoch": 0.022513415528431325, + "grad_norm": 1.0700205564498901, + "learning_rate": 1e-05, + "loss": 0.2401, + "step": 6570 + }, + { + "epoch": 0.022547682523147355, + "grad_norm": 1.1814614534378052, + "learning_rate": 1e-05, + "loss": 0.2471, + "step": 6580 + }, + { + "epoch": 0.022581949517863385, + "grad_norm": 1.3973134756088257, + "learning_rate": 1e-05, + "loss": 0.2383, + "step": 6590 + }, + { + "epoch": 0.022616216512579414, + "grad_norm": 1.244265079498291, + "learning_rate": 1e-05, + "loss": 0.2548, + "step": 6600 + }, + { + "epoch": 0.022650483507295444, + "grad_norm": 1.1685833930969238, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 6610 + }, + { + "epoch": 0.022684750502011474, + "grad_norm": 1.1566667556762695, + "learning_rate": 1e-05, + "loss": 0.2443, + "step": 6620 + }, + { + "epoch": 0.0227190174967275, + "grad_norm": 1.0241929292678833, + "learning_rate": 1e-05, + "loss": 0.2412, + "step": 6630 + }, + { + "epoch": 0.02275328449144353, + "grad_norm": 1.0359474420547485, + "learning_rate": 1e-05, + "loss": 0.2374, + "step": 6640 + }, + { + "epoch": 0.02278755148615956, + "grad_norm": 1.040810227394104, + "learning_rate": 1e-05, + "loss": 0.2254, + "step": 6650 + }, + { + "epoch": 0.02282181848087559, + "grad_norm": 1.0343252420425415, + "learning_rate": 1e-05, + "loss": 0.2366, + "step": 6660 + }, + { + "epoch": 0.02285608547559162, + "grad_norm": 1.052739143371582, + "learning_rate": 1e-05, + "loss": 0.2273, + "step": 6670 + }, + { + "epoch": 0.02289035247030765, + "grad_norm": 1.0414966344833374, + "learning_rate": 1e-05, + "loss": 0.2082, + "step": 6680 + }, + { + "epoch": 0.02292461946502368, + "grad_norm": 1.2340532541275024, + "learning_rate": 1e-05, + "loss": 0.2241, + "step": 6690 + }, + { + "epoch": 0.02295888645973971, + "grad_norm": 0.9693310260772705, + "learning_rate": 1e-05, + "loss": 0.2322, + "step": 6700 + }, + { + "epoch": 0.022993153454455736, + "grad_norm": 1.103025197982788, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 6710 + }, + { + "epoch": 0.023027420449171766, + "grad_norm": 1.119689702987671, + "learning_rate": 1e-05, + "loss": 0.214, + "step": 6720 + }, + { + "epoch": 0.023061687443887795, + "grad_norm": 0.93172287940979, + "learning_rate": 1e-05, + "loss": 0.2094, + "step": 6730 + }, + { + "epoch": 0.023095954438603825, + "grad_norm": 1.0207446813583374, + "learning_rate": 1e-05, + "loss": 0.2238, + "step": 6740 + }, + { + "epoch": 0.023130221433319855, + "grad_norm": 1.200201392173767, + "learning_rate": 1e-05, + "loss": 0.218, + "step": 6750 + }, + { + "epoch": 0.023164488428035885, + "grad_norm": 1.1485291719436646, + "learning_rate": 1e-05, + "loss": 0.2314, + "step": 6760 + }, + { + "epoch": 0.023198755422751915, + "grad_norm": 1.2236285209655762, + "learning_rate": 1e-05, + "loss": 0.2326, + "step": 6770 + }, + { + "epoch": 0.023233022417467945, + "grad_norm": 1.1756523847579956, + "learning_rate": 1e-05, + "loss": 0.2122, + "step": 6780 + }, + { + "epoch": 0.02326728941218397, + "grad_norm": 1.0356839895248413, + "learning_rate": 1e-05, + "loss": 0.2078, + "step": 6790 + }, + { + "epoch": 0.0233015564069, + "grad_norm": 1.1896883249282837, + "learning_rate": 1e-05, + "loss": 0.2072, + "step": 6800 + }, + { + "epoch": 0.02333582340161603, + "grad_norm": 1.1080976724624634, + "learning_rate": 1e-05, + "loss": 0.2127, + "step": 6810 + }, + { + "epoch": 0.02337009039633206, + "grad_norm": 1.128263235092163, + "learning_rate": 1e-05, + "loss": 0.2282, + "step": 6820 + }, + { + "epoch": 0.02340435739104809, + "grad_norm": 1.0398188829421997, + "learning_rate": 1e-05, + "loss": 0.2095, + "step": 6830 + }, + { + "epoch": 0.02343862438576412, + "grad_norm": 1.1791975498199463, + "learning_rate": 1e-05, + "loss": 0.2216, + "step": 6840 + }, + { + "epoch": 0.02347289138048015, + "grad_norm": 1.1444710493087769, + "learning_rate": 1e-05, + "loss": 0.2447, + "step": 6850 + }, + { + "epoch": 0.02350715837519618, + "grad_norm": 1.136607050895691, + "learning_rate": 1e-05, + "loss": 0.2093, + "step": 6860 + }, + { + "epoch": 0.023541425369912206, + "grad_norm": 1.0915231704711914, + "learning_rate": 1e-05, + "loss": 0.2128, + "step": 6870 + }, + { + "epoch": 0.023575692364628236, + "grad_norm": 1.0416276454925537, + "learning_rate": 1e-05, + "loss": 0.2092, + "step": 6880 + }, + { + "epoch": 0.023609959359344266, + "grad_norm": 1.3693732023239136, + "learning_rate": 1e-05, + "loss": 0.2137, + "step": 6890 + }, + { + "epoch": 0.023644226354060296, + "grad_norm": 1.1747677326202393, + "learning_rate": 1e-05, + "loss": 0.2215, + "step": 6900 + }, + { + "epoch": 0.023678493348776326, + "grad_norm": 1.1593588590621948, + "learning_rate": 1e-05, + "loss": 0.2234, + "step": 6910 + }, + { + "epoch": 0.023712760343492355, + "grad_norm": 1.2322016954421997, + "learning_rate": 1e-05, + "loss": 0.2437, + "step": 6920 + }, + { + "epoch": 0.023747027338208385, + "grad_norm": 1.167648196220398, + "learning_rate": 1e-05, + "loss": 0.2461, + "step": 6930 + }, + { + "epoch": 0.023781294332924415, + "grad_norm": 1.0984666347503662, + "learning_rate": 1e-05, + "loss": 0.2584, + "step": 6940 + }, + { + "epoch": 0.023815561327640445, + "grad_norm": 1.1234291791915894, + "learning_rate": 1e-05, + "loss": 0.2532, + "step": 6950 + }, + { + "epoch": 0.02384982832235647, + "grad_norm": 1.2158063650131226, + "learning_rate": 1e-05, + "loss": 0.2567, + "step": 6960 + }, + { + "epoch": 0.0238840953170725, + "grad_norm": 1.0958101749420166, + "learning_rate": 1e-05, + "loss": 0.2387, + "step": 6970 + }, + { + "epoch": 0.02391836231178853, + "grad_norm": 1.1536844968795776, + "learning_rate": 1e-05, + "loss": 0.2712, + "step": 6980 + }, + { + "epoch": 0.02395262930650456, + "grad_norm": 1.2437007427215576, + "learning_rate": 1e-05, + "loss": 0.2563, + "step": 6990 + }, + { + "epoch": 0.02398689630122059, + "grad_norm": 1.0884592533111572, + "learning_rate": 1e-05, + "loss": 0.2379, + "step": 7000 + }, + { + "epoch": 0.02398689630122059, + "eval_cer": 13.079908273064033, + "eval_loss": 0.2514401376247406, + "eval_normalized_cer": 9.622302158273381, + "eval_runtime": 227.6705, + "eval_samples_per_second": 2.249, + "eval_steps_per_second": 0.035, + "step": 7000 + }, + { + "epoch": 0.02402116329593662, + "grad_norm": 1.2332980632781982, + "learning_rate": 1e-05, + "loss": 0.2543, + "step": 7010 + }, + { + "epoch": 0.02405543029065265, + "grad_norm": 1.1041260957717896, + "learning_rate": 1e-05, + "loss": 0.2663, + "step": 7020 + }, + { + "epoch": 0.02408969728536868, + "grad_norm": 1.1479183435440063, + "learning_rate": 1e-05, + "loss": 0.2528, + "step": 7030 + }, + { + "epoch": 0.024123964280084707, + "grad_norm": 1.103766918182373, + "learning_rate": 1e-05, + "loss": 0.2336, + "step": 7040 + }, + { + "epoch": 0.024158231274800736, + "grad_norm": 1.238996148109436, + "learning_rate": 1e-05, + "loss": 0.2436, + "step": 7050 + }, + { + "epoch": 0.024192498269516766, + "grad_norm": 1.2652095556259155, + "learning_rate": 1e-05, + "loss": 0.2464, + "step": 7060 + }, + { + "epoch": 0.024226765264232796, + "grad_norm": 1.180665373802185, + "learning_rate": 1e-05, + "loss": 0.2541, + "step": 7070 + }, + { + "epoch": 0.024261032258948826, + "grad_norm": 1.1601506471633911, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 7080 + }, + { + "epoch": 0.024295299253664856, + "grad_norm": 1.257034420967102, + "learning_rate": 1e-05, + "loss": 0.2446, + "step": 7090 + }, + { + "epoch": 0.024329566248380886, + "grad_norm": 1.0813285112380981, + "learning_rate": 1e-05, + "loss": 0.2546, + "step": 7100 + }, + { + "epoch": 0.024363833243096916, + "grad_norm": 1.1124157905578613, + "learning_rate": 1e-05, + "loss": 0.2379, + "step": 7110 + }, + { + "epoch": 0.024398100237812942, + "grad_norm": 1.0615211725234985, + "learning_rate": 1e-05, + "loss": 0.253, + "step": 7120 + }, + { + "epoch": 0.024432367232528972, + "grad_norm": 1.185677409172058, + "learning_rate": 1e-05, + "loss": 0.2383, + "step": 7130 + }, + { + "epoch": 0.024466634227245, + "grad_norm": 1.1810061931610107, + "learning_rate": 1e-05, + "loss": 0.2603, + "step": 7140 + }, + { + "epoch": 0.02450090122196103, + "grad_norm": 1.155860424041748, + "learning_rate": 1e-05, + "loss": 0.2434, + "step": 7150 + }, + { + "epoch": 0.02453516821667706, + "grad_norm": 1.113008737564087, + "learning_rate": 1e-05, + "loss": 0.2529, + "step": 7160 + }, + { + "epoch": 0.02456943521139309, + "grad_norm": 1.1276872158050537, + "learning_rate": 1e-05, + "loss": 0.2265, + "step": 7170 + }, + { + "epoch": 0.02460370220610912, + "grad_norm": 1.149792194366455, + "learning_rate": 1e-05, + "loss": 0.2349, + "step": 7180 + }, + { + "epoch": 0.02463796920082515, + "grad_norm": 1.1619532108306885, + "learning_rate": 1e-05, + "loss": 0.2336, + "step": 7190 + }, + { + "epoch": 0.024672236195541177, + "grad_norm": 1.0760303735733032, + "learning_rate": 1e-05, + "loss": 0.2315, + "step": 7200 + }, + { + "epoch": 0.024706503190257207, + "grad_norm": 1.2807782888412476, + "learning_rate": 1e-05, + "loss": 0.2382, + "step": 7210 + }, + { + "epoch": 0.024740770184973237, + "grad_norm": 1.0910037755966187, + "learning_rate": 1e-05, + "loss": 0.2333, + "step": 7220 + }, + { + "epoch": 0.024775037179689267, + "grad_norm": 1.2938390970230103, + "learning_rate": 1e-05, + "loss": 0.2147, + "step": 7230 + }, + { + "epoch": 0.024809304174405297, + "grad_norm": 1.185542106628418, + "learning_rate": 1e-05, + "loss": 0.2232, + "step": 7240 + }, + { + "epoch": 0.024843571169121326, + "grad_norm": 1.0598995685577393, + "learning_rate": 1e-05, + "loss": 0.2278, + "step": 7250 + }, + { + "epoch": 0.024877838163837356, + "grad_norm": 1.1860477924346924, + "learning_rate": 1e-05, + "loss": 0.2179, + "step": 7260 + }, + { + "epoch": 0.024912105158553386, + "grad_norm": 1.1935844421386719, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 7270 + }, + { + "epoch": 0.024946372153269412, + "grad_norm": 1.0449039936065674, + "learning_rate": 1e-05, + "loss": 0.2307, + "step": 7280 + }, + { + "epoch": 0.024980639147985442, + "grad_norm": 1.0651369094848633, + "learning_rate": 1e-05, + "loss": 0.2379, + "step": 7290 + }, + { + "epoch": 0.025014906142701472, + "grad_norm": 1.0416852235794067, + "learning_rate": 1e-05, + "loss": 0.2208, + "step": 7300 + }, + { + "epoch": 0.025049173137417502, + "grad_norm": 1.0064860582351685, + "learning_rate": 1e-05, + "loss": 0.2227, + "step": 7310 + }, + { + "epoch": 0.025083440132133532, + "grad_norm": 1.0357342958450317, + "learning_rate": 1e-05, + "loss": 0.22, + "step": 7320 + }, + { + "epoch": 0.02511770712684956, + "grad_norm": 1.019918441772461, + "learning_rate": 1e-05, + "loss": 0.2396, + "step": 7330 + }, + { + "epoch": 0.02515197412156559, + "grad_norm": 1.0327798128128052, + "learning_rate": 1e-05, + "loss": 0.2118, + "step": 7340 + }, + { + "epoch": 0.02518624111628162, + "grad_norm": 0.9973874092102051, + "learning_rate": 1e-05, + "loss": 0.2275, + "step": 7350 + }, + { + "epoch": 0.025220508110997648, + "grad_norm": 1.093544840812683, + "learning_rate": 1e-05, + "loss": 0.2214, + "step": 7360 + }, + { + "epoch": 0.025254775105713678, + "grad_norm": 1.118829369544983, + "learning_rate": 1e-05, + "loss": 0.237, + "step": 7370 + }, + { + "epoch": 0.025289042100429707, + "grad_norm": 1.2009224891662598, + "learning_rate": 1e-05, + "loss": 0.2447, + "step": 7380 + }, + { + "epoch": 0.025323309095145737, + "grad_norm": 1.1427584886550903, + "learning_rate": 1e-05, + "loss": 0.234, + "step": 7390 + }, + { + "epoch": 0.025357576089861767, + "grad_norm": 0.9685842394828796, + "learning_rate": 1e-05, + "loss": 0.2231, + "step": 7400 + }, + { + "epoch": 0.025391843084577797, + "grad_norm": 1.165501356124878, + "learning_rate": 1e-05, + "loss": 0.2139, + "step": 7410 + }, + { + "epoch": 0.025426110079293827, + "grad_norm": 1.4023411273956299, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 7420 + }, + { + "epoch": 0.025460377074009857, + "grad_norm": 1.218546748161316, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 7430 + }, + { + "epoch": 0.025494644068725883, + "grad_norm": 1.4930671453475952, + "learning_rate": 1e-05, + "loss": 0.2466, + "step": 7440 + }, + { + "epoch": 0.025528911063441913, + "grad_norm": 1.145317554473877, + "learning_rate": 1e-05, + "loss": 0.2535, + "step": 7450 + }, + { + "epoch": 0.025563178058157943, + "grad_norm": 1.2366299629211426, + "learning_rate": 1e-05, + "loss": 0.2606, + "step": 7460 + }, + { + "epoch": 0.025597445052873972, + "grad_norm": 1.0542744398117065, + "learning_rate": 1e-05, + "loss": 0.2493, + "step": 7470 + }, + { + "epoch": 0.025631712047590002, + "grad_norm": 1.2272337675094604, + "learning_rate": 1e-05, + "loss": 0.2537, + "step": 7480 + }, + { + "epoch": 0.025665979042306032, + "grad_norm": 1.169912576675415, + "learning_rate": 1e-05, + "loss": 0.2581, + "step": 7490 + }, + { + "epoch": 0.025700246037022062, + "grad_norm": 1.1997913122177124, + "learning_rate": 1e-05, + "loss": 0.2547, + "step": 7500 + }, + { + "epoch": 0.025700246037022062, + "eval_cer": 12.859410830834362, + "eval_loss": 0.2470153123140335, + "eval_normalized_cer": 9.162669864108713, + "eval_runtime": 227.7782, + "eval_samples_per_second": 2.248, + "eval_steps_per_second": 0.035, + "step": 7500 + }, + { + "epoch": 0.025734513031738092, + "grad_norm": 1.0920944213867188, + "learning_rate": 1e-05, + "loss": 0.2498, + "step": 7510 + }, + { + "epoch": 0.025768780026454118, + "grad_norm": 1.349660038948059, + "learning_rate": 1e-05, + "loss": 0.2591, + "step": 7520 + }, + { + "epoch": 0.025803047021170148, + "grad_norm": 1.0097490549087524, + "learning_rate": 1e-05, + "loss": 0.2514, + "step": 7530 + }, + { + "epoch": 0.025837314015886178, + "grad_norm": 1.118241548538208, + "learning_rate": 1e-05, + "loss": 0.2603, + "step": 7540 + }, + { + "epoch": 0.025871581010602208, + "grad_norm": 1.078802466392517, + "learning_rate": 1e-05, + "loss": 0.2532, + "step": 7550 + }, + { + "epoch": 0.025905848005318238, + "grad_norm": 1.0794482231140137, + "learning_rate": 1e-05, + "loss": 0.2521, + "step": 7560 + }, + { + "epoch": 0.025940115000034267, + "grad_norm": 1.130106806755066, + "learning_rate": 1e-05, + "loss": 0.2574, + "step": 7570 + }, + { + "epoch": 0.025974381994750297, + "grad_norm": 1.112724781036377, + "learning_rate": 1e-05, + "loss": 0.253, + "step": 7580 + }, + { + "epoch": 0.026008648989466327, + "grad_norm": 1.2646088600158691, + "learning_rate": 1e-05, + "loss": 0.2548, + "step": 7590 + }, + { + "epoch": 0.026042915984182357, + "grad_norm": 1.1961979866027832, + "learning_rate": 1e-05, + "loss": 0.2548, + "step": 7600 + }, + { + "epoch": 0.026077182978898383, + "grad_norm": 1.2568695545196533, + "learning_rate": 1e-05, + "loss": 0.245, + "step": 7610 + }, + { + "epoch": 0.026111449973614413, + "grad_norm": 1.0233054161071777, + "learning_rate": 1e-05, + "loss": 0.2429, + "step": 7620 + }, + { + "epoch": 0.026145716968330443, + "grad_norm": 1.4355731010437012, + "learning_rate": 1e-05, + "loss": 0.2623, + "step": 7630 + }, + { + "epoch": 0.026179983963046473, + "grad_norm": 0.9781149625778198, + "learning_rate": 1e-05, + "loss": 0.2436, + "step": 7640 + }, + { + "epoch": 0.026214250957762503, + "grad_norm": 1.085255742073059, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 7650 + }, + { + "epoch": 0.026248517952478533, + "grad_norm": 1.0647081136703491, + "learning_rate": 1e-05, + "loss": 0.2596, + "step": 7660 + }, + { + "epoch": 0.026282784947194562, + "grad_norm": 1.3411939144134521, + "learning_rate": 1e-05, + "loss": 0.2444, + "step": 7670 + }, + { + "epoch": 0.026317051941910592, + "grad_norm": 1.0778676271438599, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 7680 + }, + { + "epoch": 0.02635131893662662, + "grad_norm": 1.1606541872024536, + "learning_rate": 1e-05, + "loss": 0.2537, + "step": 7690 + }, + { + "epoch": 0.02638558593134265, + "grad_norm": 1.0706511735916138, + "learning_rate": 1e-05, + "loss": 0.2324, + "step": 7700 + }, + { + "epoch": 0.02641985292605868, + "grad_norm": 1.2074836492538452, + "learning_rate": 1e-05, + "loss": 0.2487, + "step": 7710 + }, + { + "epoch": 0.026454119920774708, + "grad_norm": 1.0147804021835327, + "learning_rate": 1e-05, + "loss": 0.2202, + "step": 7720 + }, + { + "epoch": 0.026488386915490738, + "grad_norm": 1.1806961297988892, + "learning_rate": 1e-05, + "loss": 0.2464, + "step": 7730 + }, + { + "epoch": 0.026522653910206768, + "grad_norm": 1.1552751064300537, + "learning_rate": 1e-05, + "loss": 0.2244, + "step": 7740 + }, + { + "epoch": 0.026556920904922798, + "grad_norm": 1.115871548652649, + "learning_rate": 1e-05, + "loss": 0.2389, + "step": 7750 + }, + { + "epoch": 0.026591187899638827, + "grad_norm": 1.0924640893936157, + "learning_rate": 1e-05, + "loss": 0.2237, + "step": 7760 + }, + { + "epoch": 0.026625454894354854, + "grad_norm": 1.021644115447998, + "learning_rate": 1e-05, + "loss": 0.2257, + "step": 7770 + }, + { + "epoch": 0.026659721889070884, + "grad_norm": 1.1757131814956665, + "learning_rate": 1e-05, + "loss": 0.2278, + "step": 7780 + }, + { + "epoch": 0.026693988883786914, + "grad_norm": 1.1914074420928955, + "learning_rate": 1e-05, + "loss": 0.2266, + "step": 7790 + }, + { + "epoch": 0.026728255878502943, + "grad_norm": 1.0416505336761475, + "learning_rate": 1e-05, + "loss": 0.2273, + "step": 7800 + }, + { + "epoch": 0.026762522873218973, + "grad_norm": 1.0241059064865112, + "learning_rate": 1e-05, + "loss": 0.2342, + "step": 7810 + }, + { + "epoch": 0.026796789867935003, + "grad_norm": 1.133334994316101, + "learning_rate": 1e-05, + "loss": 0.2303, + "step": 7820 + }, + { + "epoch": 0.026831056862651033, + "grad_norm": 1.1711792945861816, + "learning_rate": 1e-05, + "loss": 0.2333, + "step": 7830 + }, + { + "epoch": 0.026865323857367063, + "grad_norm": 1.1120338439941406, + "learning_rate": 1e-05, + "loss": 0.2474, + "step": 7840 + }, + { + "epoch": 0.02689959085208309, + "grad_norm": 1.1995311975479126, + "learning_rate": 1e-05, + "loss": 0.2472, + "step": 7850 + }, + { + "epoch": 0.02693385784679912, + "grad_norm": 1.1725718975067139, + "learning_rate": 1e-05, + "loss": 0.2361, + "step": 7860 + }, + { + "epoch": 0.02696812484151515, + "grad_norm": 0.9564438462257385, + "learning_rate": 1e-05, + "loss": 0.2266, + "step": 7870 + }, + { + "epoch": 0.02700239183623118, + "grad_norm": 1.140692114830017, + "learning_rate": 1e-05, + "loss": 0.2319, + "step": 7880 + }, + { + "epoch": 0.02703665883094721, + "grad_norm": 1.0812654495239258, + "learning_rate": 1e-05, + "loss": 0.2434, + "step": 7890 + }, + { + "epoch": 0.02707092582566324, + "grad_norm": 1.179500937461853, + "learning_rate": 1e-05, + "loss": 0.2191, + "step": 7900 + }, + { + "epoch": 0.027105192820379268, + "grad_norm": 1.1073647737503052, + "learning_rate": 1e-05, + "loss": 0.2315, + "step": 7910 + }, + { + "epoch": 0.027139459815095298, + "grad_norm": 1.093070387840271, + "learning_rate": 1e-05, + "loss": 0.2256, + "step": 7920 + }, + { + "epoch": 0.027173726809811324, + "grad_norm": 1.2253212928771973, + "learning_rate": 1e-05, + "loss": 0.2413, + "step": 7930 + }, + { + "epoch": 0.027207993804527354, + "grad_norm": 1.1531736850738525, + "learning_rate": 1e-05, + "loss": 0.2514, + "step": 7940 + }, + { + "epoch": 0.027242260799243384, + "grad_norm": 1.0366076231002808, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 7950 + }, + { + "epoch": 0.027276527793959414, + "grad_norm": 1.1657369136810303, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 7960 + }, + { + "epoch": 0.027310794788675444, + "grad_norm": 1.3050105571746826, + "learning_rate": 1e-05, + "loss": 0.2704, + "step": 7970 + }, + { + "epoch": 0.027345061783391474, + "grad_norm": 1.1378298997879028, + "learning_rate": 1e-05, + "loss": 0.2481, + "step": 7980 + }, + { + "epoch": 0.027379328778107503, + "grad_norm": 1.1434043645858765, + "learning_rate": 1e-05, + "loss": 0.2671, + "step": 7990 + }, + { + "epoch": 0.027413595772823533, + "grad_norm": 1.0899518728256226, + "learning_rate": 1e-05, + "loss": 0.2573, + "step": 8000 + }, + { + "epoch": 0.027413595772823533, + "eval_cer": 12.903510319280295, + "eval_loss": 0.2475583553314209, + "eval_normalized_cer": 9.362509992006395, + "eval_runtime": 228.4278, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.035, + "step": 8000 + }, + { + "epoch": 0.02744786276753956, + "grad_norm": 0.970212459564209, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 8010 + }, + { + "epoch": 0.02748212976225559, + "grad_norm": 1.0460257530212402, + "learning_rate": 1e-05, + "loss": 0.2606, + "step": 8020 + }, + { + "epoch": 0.02751639675697162, + "grad_norm": 1.116742491722107, + "learning_rate": 1e-05, + "loss": 0.2571, + "step": 8030 + }, + { + "epoch": 0.02755066375168765, + "grad_norm": 1.2562140226364136, + "learning_rate": 1e-05, + "loss": 0.2561, + "step": 8040 + }, + { + "epoch": 0.02758493074640368, + "grad_norm": 1.2499713897705078, + "learning_rate": 1e-05, + "loss": 0.2683, + "step": 8050 + }, + { + "epoch": 0.02761919774111971, + "grad_norm": 1.151715874671936, + "learning_rate": 1e-05, + "loss": 0.2463, + "step": 8060 + }, + { + "epoch": 0.02765346473583574, + "grad_norm": 1.2527892589569092, + "learning_rate": 1e-05, + "loss": 0.261, + "step": 8070 + }, + { + "epoch": 0.02768773173055177, + "grad_norm": 1.1776025295257568, + "learning_rate": 1e-05, + "loss": 0.2616, + "step": 8080 + }, + { + "epoch": 0.027721998725267795, + "grad_norm": 1.1632285118103027, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 8090 + }, + { + "epoch": 0.027756265719983825, + "grad_norm": 1.3266422748565674, + "learning_rate": 1e-05, + "loss": 0.2667, + "step": 8100 + }, + { + "epoch": 0.027790532714699855, + "grad_norm": 1.240424633026123, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 8110 + }, + { + "epoch": 0.027824799709415884, + "grad_norm": 1.1874525547027588, + "learning_rate": 1e-05, + "loss": 0.2505, + "step": 8120 + }, + { + "epoch": 0.027859066704131914, + "grad_norm": 1.0850279331207275, + "learning_rate": 1e-05, + "loss": 0.2556, + "step": 8130 + }, + { + "epoch": 0.027893333698847944, + "grad_norm": 1.203342318534851, + "learning_rate": 1e-05, + "loss": 0.2526, + "step": 8140 + }, + { + "epoch": 0.027927600693563974, + "grad_norm": 0.9685319066047668, + "learning_rate": 1e-05, + "loss": 0.2614, + "step": 8150 + }, + { + "epoch": 0.027961867688280004, + "grad_norm": 1.020749807357788, + "learning_rate": 1e-05, + "loss": 0.2763, + "step": 8160 + }, + { + "epoch": 0.02799613468299603, + "grad_norm": 1.1530399322509766, + "learning_rate": 1e-05, + "loss": 0.2544, + "step": 8170 + }, + { + "epoch": 0.02803040167771206, + "grad_norm": 1.0800687074661255, + "learning_rate": 1e-05, + "loss": 0.2628, + "step": 8180 + }, + { + "epoch": 0.02806466867242809, + "grad_norm": 1.1825618743896484, + "learning_rate": 1e-05, + "loss": 0.2524, + "step": 8190 + }, + { + "epoch": 0.02809893566714412, + "grad_norm": 1.176870346069336, + "learning_rate": 1e-05, + "loss": 0.2401, + "step": 8200 + }, + { + "epoch": 0.02813320266186015, + "grad_norm": 1.19720458984375, + "learning_rate": 1e-05, + "loss": 0.2801, + "step": 8210 + }, + { + "epoch": 0.02816746965657618, + "grad_norm": 1.0634618997573853, + "learning_rate": 1e-05, + "loss": 0.2607, + "step": 8220 + }, + { + "epoch": 0.02820173665129221, + "grad_norm": 1.1780894994735718, + "learning_rate": 1e-05, + "loss": 0.2558, + "step": 8230 + }, + { + "epoch": 0.02823600364600824, + "grad_norm": 1.18949294090271, + "learning_rate": 1e-05, + "loss": 0.2432, + "step": 8240 + }, + { + "epoch": 0.02827027064072427, + "grad_norm": 1.3350197076797485, + "learning_rate": 1e-05, + "loss": 0.2644, + "step": 8250 + }, + { + "epoch": 0.028304537635440295, + "grad_norm": 1.1507694721221924, + "learning_rate": 1e-05, + "loss": 0.254, + "step": 8260 + }, + { + "epoch": 0.028338804630156325, + "grad_norm": 1.0806615352630615, + "learning_rate": 1e-05, + "loss": 0.2479, + "step": 8270 + }, + { + "epoch": 0.028373071624872355, + "grad_norm": 1.1201471090316772, + "learning_rate": 1e-05, + "loss": 0.2553, + "step": 8280 + }, + { + "epoch": 0.028407338619588385, + "grad_norm": 1.0681666135787964, + "learning_rate": 1e-05, + "loss": 0.258, + "step": 8290 + }, + { + "epoch": 0.028441605614304415, + "grad_norm": 1.0958445072174072, + "learning_rate": 1e-05, + "loss": 0.2502, + "step": 8300 + }, + { + "epoch": 0.028475872609020444, + "grad_norm": 1.165635585784912, + "learning_rate": 1e-05, + "loss": 0.2642, + "step": 8310 + }, + { + "epoch": 0.028510139603736474, + "grad_norm": 0.9674690961837769, + "learning_rate": 1e-05, + "loss": 0.2385, + "step": 8320 + }, + { + "epoch": 0.028544406598452504, + "grad_norm": 1.239996314048767, + "learning_rate": 1e-05, + "loss": 0.2706, + "step": 8330 + }, + { + "epoch": 0.02857867359316853, + "grad_norm": 1.0063962936401367, + "learning_rate": 1e-05, + "loss": 0.2448, + "step": 8340 + }, + { + "epoch": 0.02861294058788456, + "grad_norm": 1.0466179847717285, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 8350 + }, + { + "epoch": 0.02864720758260059, + "grad_norm": Infinity, + "learning_rate": 1e-05, + "loss": 0.2595, + "step": 8360 + }, + { + "epoch": 0.02868147457731662, + "grad_norm": 1.1461595296859741, + "learning_rate": 1e-05, + "loss": 0.2515, + "step": 8370 + }, + { + "epoch": 0.02871574157203265, + "grad_norm": 1.2697845697402954, + "learning_rate": 1e-05, + "loss": 0.2641, + "step": 8380 + }, + { + "epoch": 0.02875000856674868, + "grad_norm": 1.2665945291519165, + "learning_rate": 1e-05, + "loss": 0.2613, + "step": 8390 + }, + { + "epoch": 0.02878427556146471, + "grad_norm": 1.1350281238555908, + "learning_rate": 1e-05, + "loss": 0.2524, + "step": 8400 + }, + { + "epoch": 0.02881854255618074, + "grad_norm": 1.0341808795928955, + "learning_rate": 1e-05, + "loss": 0.2466, + "step": 8410 + }, + { + "epoch": 0.028852809550896766, + "grad_norm": 1.1108484268188477, + "learning_rate": 1e-05, + "loss": 0.2471, + "step": 8420 + }, + { + "epoch": 0.028887076545612796, + "grad_norm": 1.059414029121399, + "learning_rate": 1e-05, + "loss": 0.2695, + "step": 8430 + }, + { + "epoch": 0.028921343540328825, + "grad_norm": 1.0888679027557373, + "learning_rate": 1e-05, + "loss": 0.2683, + "step": 8440 + }, + { + "epoch": 0.028955610535044855, + "grad_norm": 1.1649068593978882, + "learning_rate": 1e-05, + "loss": 0.2485, + "step": 8450 + }, + { + "epoch": 0.028989877529760885, + "grad_norm": 1.218563199043274, + "learning_rate": 1e-05, + "loss": 0.2456, + "step": 8460 + }, + { + "epoch": 0.029024144524476915, + "grad_norm": 1.3558833599090576, + "learning_rate": 1e-05, + "loss": 0.2517, + "step": 8470 + }, + { + "epoch": 0.029058411519192945, + "grad_norm": 1.2579597234725952, + "learning_rate": 1e-05, + "loss": 0.2516, + "step": 8480 + }, + { + "epoch": 0.029092678513908975, + "grad_norm": 1.185253381729126, + "learning_rate": 1e-05, + "loss": 0.2475, + "step": 8490 + }, + { + "epoch": 0.029126945508625, + "grad_norm": 1.1937752962112427, + "learning_rate": 1e-05, + "loss": 0.2654, + "step": 8500 + }, + { + "epoch": 0.029126945508625, + "eval_cer": 12.89469042159111, + "eval_loss": 0.2503049969673157, + "eval_normalized_cer": 8.952837729816148, + "eval_runtime": 229.0216, + "eval_samples_per_second": 2.236, + "eval_steps_per_second": 0.035, + "step": 8500 + }, + { + "epoch": 0.02916121250334103, + "grad_norm": 1.1005933284759521, + "learning_rate": 1e-05, + "loss": 0.247, + "step": 8510 + }, + { + "epoch": 0.02919547949805706, + "grad_norm": 1.0437865257263184, + "learning_rate": 1e-05, + "loss": 0.2492, + "step": 8520 + }, + { + "epoch": 0.02922974649277309, + "grad_norm": 1.0478579998016357, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 8530 + }, + { + "epoch": 0.02926401348748912, + "grad_norm": 1.0615030527114868, + "learning_rate": 1e-05, + "loss": 0.2305, + "step": 8540 + }, + { + "epoch": 0.02929828048220515, + "grad_norm": 1.105209469795227, + "learning_rate": 1e-05, + "loss": 0.2572, + "step": 8550 + }, + { + "epoch": 0.02933254747692118, + "grad_norm": 1.139857530593872, + "learning_rate": 1e-05, + "loss": 0.237, + "step": 8560 + }, + { + "epoch": 0.02936681447163721, + "grad_norm": 1.0326822996139526, + "learning_rate": 1e-05, + "loss": 0.2587, + "step": 8570 + }, + { + "epoch": 0.029401081466353236, + "grad_norm": 1.4446253776550293, + "learning_rate": 1e-05, + "loss": 0.2488, + "step": 8580 + }, + { + "epoch": 0.029435348461069266, + "grad_norm": 1.070698857307434, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 8590 + }, + { + "epoch": 0.029469615455785296, + "grad_norm": 1.119545817375183, + "learning_rate": 1e-05, + "loss": 0.2406, + "step": 8600 + }, + { + "epoch": 0.029503882450501326, + "grad_norm": 1.1146609783172607, + "learning_rate": 1e-05, + "loss": 0.2429, + "step": 8610 + }, + { + "epoch": 0.029538149445217356, + "grad_norm": 1.107639193534851, + "learning_rate": 1e-05, + "loss": 0.2412, + "step": 8620 + }, + { + "epoch": 0.029572416439933386, + "grad_norm": 1.0722100734710693, + "learning_rate": 1e-05, + "loss": 0.2444, + "step": 8630 + }, + { + "epoch": 0.029606683434649415, + "grad_norm": 1.3313097953796387, + "learning_rate": 1e-05, + "loss": 0.2482, + "step": 8640 + }, + { + "epoch": 0.029640950429365445, + "grad_norm": 1.13486647605896, + "learning_rate": 1e-05, + "loss": 0.2397, + "step": 8650 + }, + { + "epoch": 0.02967521742408147, + "grad_norm": 1.1610273122787476, + "learning_rate": 1e-05, + "loss": 0.2594, + "step": 8660 + }, + { + "epoch": 0.0297094844187975, + "grad_norm": 1.3555855751037598, + "learning_rate": 1e-05, + "loss": 0.2521, + "step": 8670 + }, + { + "epoch": 0.02974375141351353, + "grad_norm": 1.2834869623184204, + "learning_rate": 1e-05, + "loss": 0.2638, + "step": 8680 + }, + { + "epoch": 0.02977801840822956, + "grad_norm": 1.1000789403915405, + "learning_rate": 1e-05, + "loss": 0.2371, + "step": 8690 + }, + { + "epoch": 0.02981228540294559, + "grad_norm": 0.9901896119117737, + "learning_rate": 1e-05, + "loss": 0.2535, + "step": 8700 + }, + { + "epoch": 0.02984655239766162, + "grad_norm": 1.0514518022537231, + "learning_rate": 1e-05, + "loss": 0.2443, + "step": 8710 + }, + { + "epoch": 0.02988081939237765, + "grad_norm": 1.1127166748046875, + "learning_rate": 1e-05, + "loss": 0.2259, + "step": 8720 + }, + { + "epoch": 0.02991508638709368, + "grad_norm": 1.0674943923950195, + "learning_rate": 1e-05, + "loss": 0.2309, + "step": 8730 + }, + { + "epoch": 0.029949353381809707, + "grad_norm": 1.0397884845733643, + "learning_rate": 1e-05, + "loss": 0.2411, + "step": 8740 + }, + { + "epoch": 0.029983620376525737, + "grad_norm": 1.2052630186080933, + "learning_rate": 1e-05, + "loss": 0.2294, + "step": 8750 + }, + { + "epoch": 0.030017887371241767, + "grad_norm": 1.1350561380386353, + "learning_rate": 1e-05, + "loss": 0.2264, + "step": 8760 + }, + { + "epoch": 0.030052154365957796, + "grad_norm": 1.1187571287155151, + "learning_rate": 1e-05, + "loss": 0.2325, + "step": 8770 + }, + { + "epoch": 0.030086421360673826, + "grad_norm": 1.0860145092010498, + "learning_rate": 1e-05, + "loss": 0.2328, + "step": 8780 + }, + { + "epoch": 0.030120688355389856, + "grad_norm": 1.1102906465530396, + "learning_rate": 1e-05, + "loss": 0.2271, + "step": 8790 + }, + { + "epoch": 0.030154955350105886, + "grad_norm": 1.0239520072937012, + "learning_rate": 1e-05, + "loss": 0.2489, + "step": 8800 + }, + { + "epoch": 0.030189222344821916, + "grad_norm": 1.0980205535888672, + "learning_rate": 1e-05, + "loss": 0.2314, + "step": 8810 + }, + { + "epoch": 0.030223489339537942, + "grad_norm": 1.1887843608856201, + "learning_rate": 1e-05, + "loss": 0.24, + "step": 8820 + }, + { + "epoch": 0.030257756334253972, + "grad_norm": 1.2101106643676758, + "learning_rate": 1e-05, + "loss": 0.2353, + "step": 8830 + }, + { + "epoch": 0.030292023328970002, + "grad_norm": 1.1793345212936401, + "learning_rate": 1e-05, + "loss": 0.2482, + "step": 8840 + }, + { + "epoch": 0.03032629032368603, + "grad_norm": 1.1983866691589355, + "learning_rate": 1e-05, + "loss": 0.2441, + "step": 8850 + }, + { + "epoch": 0.03036055731840206, + "grad_norm": 0.9888906478881836, + "learning_rate": 1e-05, + "loss": 0.2271, + "step": 8860 + }, + { + "epoch": 0.03039482431311809, + "grad_norm": 1.217824101448059, + "learning_rate": 1e-05, + "loss": 0.2298, + "step": 8870 + }, + { + "epoch": 0.03042909130783412, + "grad_norm": 0.9851268529891968, + "learning_rate": 1e-05, + "loss": 0.2318, + "step": 8880 + }, + { + "epoch": 0.03046335830255015, + "grad_norm": 1.0329748392105103, + "learning_rate": 1e-05, + "loss": 0.2654, + "step": 8890 + }, + { + "epoch": 0.03049762529726618, + "grad_norm": 1.067325234413147, + "learning_rate": 1e-05, + "loss": 0.23, + "step": 8900 + }, + { + "epoch": 0.030531892291982207, + "grad_norm": 1.2636964321136475, + "learning_rate": 1e-05, + "loss": 0.2303, + "step": 8910 + }, + { + "epoch": 0.030566159286698237, + "grad_norm": 1.1565788984298706, + "learning_rate": 1e-05, + "loss": 0.2494, + "step": 8920 + }, + { + "epoch": 0.030600426281414267, + "grad_norm": 1.2197197675704956, + "learning_rate": 1e-05, + "loss": 0.2338, + "step": 8930 + }, + { + "epoch": 0.030634693276130297, + "grad_norm": 1.1062088012695312, + "learning_rate": 1e-05, + "loss": 0.2157, + "step": 8940 + }, + { + "epoch": 0.030668960270846327, + "grad_norm": 1.107677936553955, + "learning_rate": 1e-05, + "loss": 0.2387, + "step": 8950 + }, + { + "epoch": 0.030703227265562356, + "grad_norm": 1.1791733503341675, + "learning_rate": 1e-05, + "loss": 0.2337, + "step": 8960 + }, + { + "epoch": 0.030737494260278386, + "grad_norm": 1.1337239742279053, + "learning_rate": 1e-05, + "loss": 0.2127, + "step": 8970 + }, + { + "epoch": 0.030771761254994416, + "grad_norm": 1.0418322086334229, + "learning_rate": 1e-05, + "loss": 0.222, + "step": 8980 + }, + { + "epoch": 0.030806028249710442, + "grad_norm": 1.1591708660125732, + "learning_rate": 1e-05, + "loss": 0.2388, + "step": 8990 + }, + { + "epoch": 0.030840295244426472, + "grad_norm": 1.0103886127471924, + "learning_rate": 1e-05, + "loss": 0.2264, + "step": 9000 + }, + { + "epoch": 0.030840295244426472, + "eval_cer": 13.273946022226143, + "eval_loss": 0.2514854073524475, + "eval_normalized_cer": 9.492406075139888, + "eval_runtime": 227.5807, + "eval_samples_per_second": 2.25, + "eval_steps_per_second": 0.035, + "step": 9000 + }, + { + "epoch": 0.030874562239142502, + "grad_norm": 1.15034019947052, + "learning_rate": 1e-05, + "loss": 0.2215, + "step": 9010 + }, + { + "epoch": 0.030908829233858532, + "grad_norm": 1.183698296546936, + "learning_rate": 1e-05, + "loss": 0.2235, + "step": 9020 + }, + { + "epoch": 0.030943096228574562, + "grad_norm": 1.1930736303329468, + "learning_rate": 1e-05, + "loss": 0.25, + "step": 9030 + }, + { + "epoch": 0.03097736322329059, + "grad_norm": 1.0650999546051025, + "learning_rate": 1e-05, + "loss": 0.224, + "step": 9040 + }, + { + "epoch": 0.03101163021800662, + "grad_norm": 1.0613574981689453, + "learning_rate": 1e-05, + "loss": 0.2379, + "step": 9050 + }, + { + "epoch": 0.03104589721272265, + "grad_norm": 1.3004292249679565, + "learning_rate": 1e-05, + "loss": 0.2286, + "step": 9060 + }, + { + "epoch": 0.031080164207438678, + "grad_norm": 1.144573450088501, + "learning_rate": 1e-05, + "loss": 0.2375, + "step": 9070 + }, + { + "epoch": 0.031114431202154708, + "grad_norm": 1.3552353382110596, + "learning_rate": 1e-05, + "loss": 0.216, + "step": 9080 + }, + { + "epoch": 0.031148698196870737, + "grad_norm": 1.139901041984558, + "learning_rate": 1e-05, + "loss": 0.2377, + "step": 9090 + }, + { + "epoch": 0.031182965191586767, + "grad_norm": 1.179685354232788, + "learning_rate": 1e-05, + "loss": 0.2389, + "step": 9100 + }, + { + "epoch": 0.031217232186302797, + "grad_norm": 1.1494848728179932, + "learning_rate": 1e-05, + "loss": 0.2275, + "step": 9110 + }, + { + "epoch": 0.03125149918101883, + "grad_norm": 1.0726871490478516, + "learning_rate": 1e-05, + "loss": 0.211, + "step": 9120 + }, + { + "epoch": 0.03128576617573486, + "grad_norm": 1.254655361175537, + "learning_rate": 1e-05, + "loss": 0.2332, + "step": 9130 + }, + { + "epoch": 0.03132003317045089, + "grad_norm": 1.1774569749832153, + "learning_rate": 1e-05, + "loss": 0.2441, + "step": 9140 + }, + { + "epoch": 0.031354300165166916, + "grad_norm": 1.0810356140136719, + "learning_rate": 1e-05, + "loss": 0.2354, + "step": 9150 + }, + { + "epoch": 0.031388567159882946, + "grad_norm": 1.0976495742797852, + "learning_rate": 1e-05, + "loss": 0.2347, + "step": 9160 + }, + { + "epoch": 0.031422834154598976, + "grad_norm": 1.2417839765548706, + "learning_rate": 1e-05, + "loss": 0.2261, + "step": 9170 + }, + { + "epoch": 0.031457101149315006, + "grad_norm": 1.0022953748703003, + "learning_rate": 1e-05, + "loss": 0.2277, + "step": 9180 + }, + { + "epoch": 0.03149136814403103, + "grad_norm": 1.1461567878723145, + "learning_rate": 1e-05, + "loss": 0.218, + "step": 9190 + }, + { + "epoch": 0.03152563513874706, + "grad_norm": 1.1877334117889404, + "learning_rate": 1e-05, + "loss": 0.2195, + "step": 9200 + }, + { + "epoch": 0.03155990213346309, + "grad_norm": 1.1513786315917969, + "learning_rate": 1e-05, + "loss": 0.2354, + "step": 9210 + }, + { + "epoch": 0.03159416912817912, + "grad_norm": 1.057938814163208, + "learning_rate": 1e-05, + "loss": 0.2428, + "step": 9220 + }, + { + "epoch": 0.03162843612289515, + "grad_norm": 1.0752719640731812, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 9230 + }, + { + "epoch": 0.03166270311761118, + "grad_norm": 1.1784312725067139, + "learning_rate": 1e-05, + "loss": 0.2556, + "step": 9240 + }, + { + "epoch": 0.03169697011232721, + "grad_norm": 1.0176231861114502, + "learning_rate": 1e-05, + "loss": 0.2552, + "step": 9250 + }, + { + "epoch": 0.03173123710704324, + "grad_norm": 1.0849392414093018, + "learning_rate": 1e-05, + "loss": 0.2483, + "step": 9260 + }, + { + "epoch": 0.03176550410175927, + "grad_norm": 1.0042351484298706, + "learning_rate": 1e-05, + "loss": 0.2472, + "step": 9270 + }, + { + "epoch": 0.0317997710964753, + "grad_norm": 1.0480408668518066, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 9280 + }, + { + "epoch": 0.03183403809119133, + "grad_norm": 1.0602933168411255, + "learning_rate": 1e-05, + "loss": 0.2657, + "step": 9290 + }, + { + "epoch": 0.03186830508590736, + "grad_norm": 1.1704037189483643, + "learning_rate": 1e-05, + "loss": 0.2634, + "step": 9300 + }, + { + "epoch": 0.03190257208062339, + "grad_norm": 1.2454304695129395, + "learning_rate": 1e-05, + "loss": 0.2648, + "step": 9310 + }, + { + "epoch": 0.03193683907533942, + "grad_norm": 1.0540211200714111, + "learning_rate": 1e-05, + "loss": 0.2462, + "step": 9320 + }, + { + "epoch": 0.03197110607005545, + "grad_norm": 1.1440715789794922, + "learning_rate": 1e-05, + "loss": 0.2609, + "step": 9330 + }, + { + "epoch": 0.03200537306477148, + "grad_norm": 1.0083932876586914, + "learning_rate": 1e-05, + "loss": 0.2422, + "step": 9340 + }, + { + "epoch": 0.0320396400594875, + "grad_norm": 1.0180490016937256, + "learning_rate": 1e-05, + "loss": 0.2402, + "step": 9350 + }, + { + "epoch": 0.03207390705420353, + "grad_norm": 1.1158274412155151, + "learning_rate": 1e-05, + "loss": 0.2378, + "step": 9360 + }, + { + "epoch": 0.03210817404891956, + "grad_norm": 1.2014826536178589, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 9370 + }, + { + "epoch": 0.03214244104363559, + "grad_norm": 1.1604617834091187, + "learning_rate": 1e-05, + "loss": 0.2396, + "step": 9380 + }, + { + "epoch": 0.03217670803835162, + "grad_norm": 1.1088517904281616, + "learning_rate": 1e-05, + "loss": 0.2364, + "step": 9390 + }, + { + "epoch": 0.03221097503306765, + "grad_norm": 1.1905596256256104, + "learning_rate": 1e-05, + "loss": 0.2335, + "step": 9400 + }, + { + "epoch": 0.03224524202778368, + "grad_norm": 1.2237385511398315, + "learning_rate": 1e-05, + "loss": 0.2522, + "step": 9410 + }, + { + "epoch": 0.03227950902249971, + "grad_norm": 1.0582191944122314, + "learning_rate": 1e-05, + "loss": 0.2467, + "step": 9420 + }, + { + "epoch": 0.03231377601721574, + "grad_norm": 1.2065699100494385, + "learning_rate": 1e-05, + "loss": 0.2359, + "step": 9430 + }, + { + "epoch": 0.03234804301193177, + "grad_norm": 1.1399251222610474, + "learning_rate": 1e-05, + "loss": 0.2507, + "step": 9440 + }, + { + "epoch": 0.0323823100066478, + "grad_norm": 1.070038914680481, + "learning_rate": 1e-05, + "loss": 0.2732, + "step": 9450 + }, + { + "epoch": 0.03241657700136383, + "grad_norm": 1.0940920114517212, + "learning_rate": 1e-05, + "loss": 0.2609, + "step": 9460 + }, + { + "epoch": 0.03245084399607986, + "grad_norm": 1.2042659521102905, + "learning_rate": 1e-05, + "loss": 0.2511, + "step": 9470 + }, + { + "epoch": 0.03248511099079589, + "grad_norm": 1.0865731239318848, + "learning_rate": 1e-05, + "loss": 0.2693, + "step": 9480 + }, + { + "epoch": 0.03251937798551192, + "grad_norm": 1.0884779691696167, + "learning_rate": 1e-05, + "loss": 0.2303, + "step": 9490 + }, + { + "epoch": 0.03255364498022795, + "grad_norm": 1.0557243824005127, + "learning_rate": 1e-05, + "loss": 0.2611, + "step": 9500 + }, + { + "epoch": 0.03255364498022795, + "eval_cer": 13.300405715293703, + "eval_loss": 0.2491595298051834, + "eval_normalized_cer": 9.722222222222223, + "eval_runtime": 227.5398, + "eval_samples_per_second": 2.25, + "eval_steps_per_second": 0.035, + "step": 9500 + }, + { + "epoch": 0.03258791197494398, + "grad_norm": 1.1241854429244995, + "learning_rate": 1e-05, + "loss": 0.2531, + "step": 9510 + }, + { + "epoch": 0.03262217896966, + "grad_norm": 1.091977834701538, + "learning_rate": 1e-05, + "loss": 0.2437, + "step": 9520 + }, + { + "epoch": 0.03265644596437603, + "grad_norm": 1.0969996452331543, + "learning_rate": 1e-05, + "loss": 0.2595, + "step": 9530 + }, + { + "epoch": 0.03269071295909206, + "grad_norm": 1.1597386598587036, + "learning_rate": 1e-05, + "loss": 0.246, + "step": 9540 + }, + { + "epoch": 0.03272497995380809, + "grad_norm": 1.0741667747497559, + "learning_rate": 1e-05, + "loss": 0.2435, + "step": 9550 + }, + { + "epoch": 0.03275924694852412, + "grad_norm": 1.1600459814071655, + "learning_rate": 1e-05, + "loss": 0.2476, + "step": 9560 + }, + { + "epoch": 0.03279351394324015, + "grad_norm": 1.0636577606201172, + "learning_rate": 1e-05, + "loss": 0.2323, + "step": 9570 + }, + { + "epoch": 0.03282778093795618, + "grad_norm": 1.0010998249053955, + "learning_rate": 1e-05, + "loss": 0.2436, + "step": 9580 + }, + { + "epoch": 0.03286204793267221, + "grad_norm": 1.1286782026290894, + "learning_rate": 1e-05, + "loss": 0.2585, + "step": 9590 + }, + { + "epoch": 0.03289631492738824, + "grad_norm": 1.1575970649719238, + "learning_rate": 1e-05, + "loss": 0.2529, + "step": 9600 + }, + { + "epoch": 0.03293058192210427, + "grad_norm": 1.2235829830169678, + "learning_rate": 1e-05, + "loss": 0.2617, + "step": 9610 + }, + { + "epoch": 0.0329648489168203, + "grad_norm": 1.203371286392212, + "learning_rate": 1e-05, + "loss": 0.2299, + "step": 9620 + }, + { + "epoch": 0.03299911591153633, + "grad_norm": 1.2438814640045166, + "learning_rate": 1e-05, + "loss": 0.2544, + "step": 9630 + }, + { + "epoch": 0.03303338290625236, + "grad_norm": 1.2102582454681396, + "learning_rate": 1e-05, + "loss": 0.2415, + "step": 9640 + }, + { + "epoch": 0.03306764990096839, + "grad_norm": 0.9984006285667419, + "learning_rate": 1e-05, + "loss": 0.2633, + "step": 9650 + }, + { + "epoch": 0.03310191689568442, + "grad_norm": 1.0693376064300537, + "learning_rate": 1e-05, + "loss": 0.2394, + "step": 9660 + }, + { + "epoch": 0.03313618389040045, + "grad_norm": 1.2770649194717407, + "learning_rate": 1e-05, + "loss": 0.2577, + "step": 9670 + }, + { + "epoch": 0.03317045088511647, + "grad_norm": 1.3751499652862549, + "learning_rate": 1e-05, + "loss": 0.2595, + "step": 9680 + }, + { + "epoch": 0.0332047178798325, + "grad_norm": 1.247056484222412, + "learning_rate": 1e-05, + "loss": 0.2526, + "step": 9690 + }, + { + "epoch": 0.03323898487454853, + "grad_norm": 1.2248870134353638, + "learning_rate": 1e-05, + "loss": 0.2609, + "step": 9700 + }, + { + "epoch": 0.03327325186926456, + "grad_norm": 1.0467997789382935, + "learning_rate": 1e-05, + "loss": 0.2353, + "step": 9710 + }, + { + "epoch": 0.03330751886398059, + "grad_norm": 1.2705328464508057, + "learning_rate": 1e-05, + "loss": 0.2483, + "step": 9720 + }, + { + "epoch": 0.03334178585869662, + "grad_norm": 1.0360983610153198, + "learning_rate": 1e-05, + "loss": 0.2497, + "step": 9730 + }, + { + "epoch": 0.03337605285341265, + "grad_norm": 1.1409697532653809, + "learning_rate": 1e-05, + "loss": 0.2506, + "step": 9740 + }, + { + "epoch": 0.03341031984812868, + "grad_norm": 1.0656344890594482, + "learning_rate": 1e-05, + "loss": 0.252, + "step": 9750 + }, + { + "epoch": 0.03344458684284471, + "grad_norm": 1.1420108079910278, + "learning_rate": 1e-05, + "loss": 0.2383, + "step": 9760 + }, + { + "epoch": 0.03347885383756074, + "grad_norm": 1.0792863368988037, + "learning_rate": 1e-05, + "loss": 0.2461, + "step": 9770 + }, + { + "epoch": 0.03351312083227677, + "grad_norm": 1.1297261714935303, + "learning_rate": 1e-05, + "loss": 0.2501, + "step": 9780 + }, + { + "epoch": 0.0335473878269928, + "grad_norm": 1.2627495527267456, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 9790 + }, + { + "epoch": 0.03358165482170883, + "grad_norm": 0.982812762260437, + "learning_rate": 1e-05, + "loss": 0.2443, + "step": 9800 + }, + { + "epoch": 0.03361592181642486, + "grad_norm": 1.048766016960144, + "learning_rate": 1e-05, + "loss": 0.2531, + "step": 9810 + }, + { + "epoch": 0.03365018881114089, + "grad_norm": 1.141780972480774, + "learning_rate": 1e-05, + "loss": 0.2271, + "step": 9820 + }, + { + "epoch": 0.03368445580585692, + "grad_norm": 1.101762056350708, + "learning_rate": 1e-05, + "loss": 0.2338, + "step": 9830 + }, + { + "epoch": 0.03371872280057294, + "grad_norm": 1.0150196552276611, + "learning_rate": 1e-05, + "loss": 0.2404, + "step": 9840 + }, + { + "epoch": 0.03375298979528897, + "grad_norm": 1.1550086736679077, + "learning_rate": 1e-05, + "loss": 0.2479, + "step": 9850 + }, + { + "epoch": 0.033787256790005, + "grad_norm": 1.1246519088745117, + "learning_rate": 1e-05, + "loss": 0.2525, + "step": 9860 + }, + { + "epoch": 0.03382152378472103, + "grad_norm": 0.9673643708229065, + "learning_rate": 1e-05, + "loss": 0.2453, + "step": 9870 + }, + { + "epoch": 0.03385579077943706, + "grad_norm": 1.019649863243103, + "learning_rate": 1e-05, + "loss": 0.2407, + "step": 9880 + }, + { + "epoch": 0.03389005777415309, + "grad_norm": 1.1477577686309814, + "learning_rate": 1e-05, + "loss": 0.2374, + "step": 9890 + }, + { + "epoch": 0.03392432476886912, + "grad_norm": 0.9760174751281738, + "learning_rate": 1e-05, + "loss": 0.248, + "step": 9900 + }, + { + "epoch": 0.03395859176358515, + "grad_norm": 1.1654585599899292, + "learning_rate": 1e-05, + "loss": 0.253, + "step": 9910 + }, + { + "epoch": 0.03399285875830118, + "grad_norm": 1.0818895101547241, + "learning_rate": 1e-05, + "loss": 0.2397, + "step": 9920 + }, + { + "epoch": 0.03402712575301721, + "grad_norm": 1.1635690927505493, + "learning_rate": 1e-05, + "loss": 0.2539, + "step": 9930 + }, + { + "epoch": 0.03406139274773324, + "grad_norm": 1.0819408893585205, + "learning_rate": 1e-05, + "loss": 0.2352, + "step": 9940 + }, + { + "epoch": 0.03409565974244927, + "grad_norm": 1.0151749849319458, + "learning_rate": 1e-05, + "loss": 0.2378, + "step": 9950 + }, + { + "epoch": 0.0341299267371653, + "grad_norm": 1.092203140258789, + "learning_rate": 1e-05, + "loss": 0.2406, + "step": 9960 + }, + { + "epoch": 0.03416419373188133, + "grad_norm": 1.1004047393798828, + "learning_rate": 1e-05, + "loss": 0.2375, + "step": 9970 + }, + { + "epoch": 0.03419846072659736, + "grad_norm": 1.2596560716629028, + "learning_rate": 1e-05, + "loss": 0.2601, + "step": 9980 + }, + { + "epoch": 0.03423272772131339, + "grad_norm": 1.0936402082443237, + "learning_rate": 1e-05, + "loss": 0.2451, + "step": 9990 + }, + { + "epoch": 0.03426699471602941, + "grad_norm": 1.1660488843917847, + "learning_rate": 1e-05, + "loss": 0.2523, + "step": 10000 + }, + { + "epoch": 0.03426699471602941, + "eval_cer": 12.656553183983066, + "eval_loss": 0.24734708666801453, + "eval_normalized_cer": 8.912869704236611, + "eval_runtime": 226.9924, + "eval_samples_per_second": 2.256, + "eval_steps_per_second": 0.035, + "step": 10000 + }, + { + "epoch": 0.03430126171074544, + "grad_norm": 1.0807442665100098, + "learning_rate": 1e-05, + "loss": 0.2583, + "step": 10010 + }, + { + "epoch": 0.03433552870546147, + "grad_norm": 1.17780339717865, + "learning_rate": 1e-05, + "loss": 0.2685, + "step": 10020 + }, + { + "epoch": 0.0343697957001775, + "grad_norm": 0.98011314868927, + "learning_rate": 1e-05, + "loss": 0.2445, + "step": 10030 + }, + { + "epoch": 0.03440406269489353, + "grad_norm": 1.3025845289230347, + "learning_rate": 1e-05, + "loss": 0.2526, + "step": 10040 + }, + { + "epoch": 0.03443832968960956, + "grad_norm": 1.4656189680099487, + "learning_rate": 1e-05, + "loss": 0.2618, + "step": 10050 + }, + { + "epoch": 0.03447259668432559, + "grad_norm": 1.2372117042541504, + "learning_rate": 1e-05, + "loss": 0.2545, + "step": 10060 + }, + { + "epoch": 0.03450686367904162, + "grad_norm": 1.1028844118118286, + "learning_rate": 1e-05, + "loss": 0.2511, + "step": 10070 + }, + { + "epoch": 0.03454113067375765, + "grad_norm": 1.2402809858322144, + "learning_rate": 1e-05, + "loss": 0.2507, + "step": 10080 + }, + { + "epoch": 0.03457539766847368, + "grad_norm": 1.1127125024795532, + "learning_rate": 1e-05, + "loss": 0.2634, + "step": 10090 + }, + { + "epoch": 0.03460966466318971, + "grad_norm": 1.1486737728118896, + "learning_rate": 1e-05, + "loss": 0.2495, + "step": 10100 + }, + { + "epoch": 0.03464393165790574, + "grad_norm": 1.0792872905731201, + "learning_rate": 1e-05, + "loss": 0.2347, + "step": 10110 + }, + { + "epoch": 0.03467819865262177, + "grad_norm": 1.0346540212631226, + "learning_rate": 1e-05, + "loss": 0.2468, + "step": 10120 + }, + { + "epoch": 0.0347124656473378, + "grad_norm": 1.165614128112793, + "learning_rate": 1e-05, + "loss": 0.2419, + "step": 10130 + }, + { + "epoch": 0.03474673264205383, + "grad_norm": 1.1619865894317627, + "learning_rate": 1e-05, + "loss": 0.2428, + "step": 10140 + }, + { + "epoch": 0.03478099963676986, + "grad_norm": 1.2419089078903198, + "learning_rate": 1e-05, + "loss": 0.2652, + "step": 10150 + }, + { + "epoch": 0.03481526663148589, + "grad_norm": 1.0978246927261353, + "learning_rate": 1e-05, + "loss": 0.2396, + "step": 10160 + }, + { + "epoch": 0.03484953362620191, + "grad_norm": 0.9916633367538452, + "learning_rate": 1e-05, + "loss": 0.2603, + "step": 10170 + }, + { + "epoch": 0.03488380062091794, + "grad_norm": 1.026318907737732, + "learning_rate": 1e-05, + "loss": 0.2564, + "step": 10180 + }, + { + "epoch": 0.03491806761563397, + "grad_norm": 1.1151725053787231, + "learning_rate": 1e-05, + "loss": 0.2587, + "step": 10190 + }, + { + "epoch": 0.03495233461035, + "grad_norm": 1.1420174837112427, + "learning_rate": 1e-05, + "loss": 0.2695, + "step": 10200 + }, + { + "epoch": 0.03498660160506603, + "grad_norm": 1.0311987400054932, + "learning_rate": 1e-05, + "loss": 0.2574, + "step": 10210 + }, + { + "epoch": 0.03502086859978206, + "grad_norm": 0.9759404063224792, + "learning_rate": 1e-05, + "loss": 0.2398, + "step": 10220 + }, + { + "epoch": 0.03505513559449809, + "grad_norm": 1.5086033344268799, + "learning_rate": 1e-05, + "loss": 0.2589, + "step": 10230 + }, + { + "epoch": 0.03508940258921412, + "grad_norm": 1.167893409729004, + "learning_rate": 1e-05, + "loss": 0.2536, + "step": 10240 + }, + { + "epoch": 0.03512366958393015, + "grad_norm": 1.0426411628723145, + "learning_rate": 1e-05, + "loss": 0.237, + "step": 10250 + }, + { + "epoch": 0.03515793657864618, + "grad_norm": 1.4766713380813599, + "learning_rate": 1e-05, + "loss": 0.2363, + "step": 10260 + }, + { + "epoch": 0.03519220357336221, + "grad_norm": 1.019641399383545, + "learning_rate": 1e-05, + "loss": 0.2281, + "step": 10270 + }, + { + "epoch": 0.03522647056807824, + "grad_norm": 1.1627217531204224, + "learning_rate": 1e-05, + "loss": 0.2524, + "step": 10280 + }, + { + "epoch": 0.03526073756279427, + "grad_norm": 1.165414571762085, + "learning_rate": 1e-05, + "loss": 0.2437, + "step": 10290 + }, + { + "epoch": 0.0352950045575103, + "grad_norm": 1.1816645860671997, + "learning_rate": 1e-05, + "loss": 0.2471, + "step": 10300 + }, + { + "epoch": 0.03532927155222633, + "grad_norm": 1.0116764307022095, + "learning_rate": 1e-05, + "loss": 0.226, + "step": 10310 + }, + { + "epoch": 0.03536353854694236, + "grad_norm": 1.2257869243621826, + "learning_rate": 1e-05, + "loss": 0.2441, + "step": 10320 + }, + { + "epoch": 0.03539780554165838, + "grad_norm": 1.1971989870071411, + "learning_rate": 1e-05, + "loss": 0.2268, + "step": 10330 + }, + { + "epoch": 0.03543207253637441, + "grad_norm": 1.057354211807251, + "learning_rate": 1e-05, + "loss": 0.2142, + "step": 10340 + }, + { + "epoch": 0.03546633953109044, + "grad_norm": 1.2593644857406616, + "learning_rate": 1e-05, + "loss": 0.2149, + "step": 10350 + }, + { + "epoch": 0.03550060652580647, + "grad_norm": 1.0903648138046265, + "learning_rate": 1e-05, + "loss": 0.2494, + "step": 10360 + }, + { + "epoch": 0.0355348735205225, + "grad_norm": 1.2079240083694458, + "learning_rate": 1e-05, + "loss": 0.2273, + "step": 10370 + }, + { + "epoch": 0.03556914051523853, + "grad_norm": 1.206696629524231, + "learning_rate": 1e-05, + "loss": 0.2142, + "step": 10380 + }, + { + "epoch": 0.03560340750995456, + "grad_norm": 1.2176302671432495, + "learning_rate": 1e-05, + "loss": 0.2277, + "step": 10390 + }, + { + "epoch": 0.03563767450467059, + "grad_norm": 0.9780252575874329, + "learning_rate": 1e-05, + "loss": 0.2089, + "step": 10400 + }, + { + "epoch": 0.03567194149938662, + "grad_norm": 1.1160544157028198, + "learning_rate": 1e-05, + "loss": 0.2268, + "step": 10410 + }, + { + "epoch": 0.03570620849410265, + "grad_norm": 1.1231842041015625, + "learning_rate": 1e-05, + "loss": 0.2385, + "step": 10420 + }, + { + "epoch": 0.03574047548881868, + "grad_norm": 1.142675518989563, + "learning_rate": 1e-05, + "loss": 0.2386, + "step": 10430 + }, + { + "epoch": 0.03577474248353471, + "grad_norm": 1.1563806533813477, + "learning_rate": 1e-05, + "loss": 0.2217, + "step": 10440 + }, + { + "epoch": 0.03580900947825074, + "grad_norm": 1.0655251741409302, + "learning_rate": 1e-05, + "loss": 0.2207, + "step": 10450 + }, + { + "epoch": 0.03584327647296677, + "grad_norm": 1.1469309329986572, + "learning_rate": 1e-05, + "loss": 0.2211, + "step": 10460 + }, + { + "epoch": 0.0358775434676828, + "grad_norm": 1.182896375656128, + "learning_rate": 1e-05, + "loss": 0.2316, + "step": 10470 + }, + { + "epoch": 0.03591181046239883, + "grad_norm": 1.018953800201416, + "learning_rate": 1e-05, + "loss": 0.2308, + "step": 10480 + }, + { + "epoch": 0.03594607745711485, + "grad_norm": 1.1578072309494019, + "learning_rate": 1e-05, + "loss": 0.2272, + "step": 10490 + }, + { + "epoch": 0.03598034445183088, + "grad_norm": 1.1483505964279175, + "learning_rate": 1e-05, + "loss": 0.2469, + "step": 10500 + }, + { + "epoch": 0.03598034445183088, + "eval_cer": 13.026988886928912, + "eval_loss": 0.24956555664539337, + "eval_normalized_cer": 9.442446043165468, + "eval_runtime": 227.6026, + "eval_samples_per_second": 2.25, + "eval_steps_per_second": 0.035, + "step": 10500 + }, + { + "epoch": 0.03601461144654691, + "grad_norm": 1.1269420385360718, + "learning_rate": 1e-05, + "loss": 0.2482, + "step": 10510 + }, + { + "epoch": 0.03604887844126294, + "grad_norm": 1.0399614572525024, + "learning_rate": 1e-05, + "loss": 0.2473, + "step": 10520 + }, + { + "epoch": 0.03608314543597897, + "grad_norm": 1.11722731590271, + "learning_rate": 1e-05, + "loss": 0.2566, + "step": 10530 + }, + { + "epoch": 0.036117412430695, + "grad_norm": 1.0773251056671143, + "learning_rate": 1e-05, + "loss": 0.2479, + "step": 10540 + }, + { + "epoch": 0.03615167942541103, + "grad_norm": 1.0123059749603271, + "learning_rate": 1e-05, + "loss": 0.2527, + "step": 10550 + }, + { + "epoch": 0.03618594642012706, + "grad_norm": 1.14670991897583, + "learning_rate": 1e-05, + "loss": 0.2474, + "step": 10560 + }, + { + "epoch": 0.03622021341484309, + "grad_norm": 1.021543025970459, + "learning_rate": 1e-05, + "loss": 0.2314, + "step": 10570 + }, + { + "epoch": 0.03625448040955912, + "grad_norm": 1.1485329866409302, + "learning_rate": 1e-05, + "loss": 0.2603, + "step": 10580 + }, + { + "epoch": 0.03628874740427515, + "grad_norm": 1.239241600036621, + "learning_rate": 1e-05, + "loss": 0.2479, + "step": 10590 + }, + { + "epoch": 0.03632301439899118, + "grad_norm": 1.2692322731018066, + "learning_rate": 1e-05, + "loss": 0.2411, + "step": 10600 + }, + { + "epoch": 0.03635728139370721, + "grad_norm": 1.1440175771713257, + "learning_rate": 1e-05, + "loss": 0.256, + "step": 10610 + }, + { + "epoch": 0.03639154838842324, + "grad_norm": 1.0153664350509644, + "learning_rate": 1e-05, + "loss": 0.2468, + "step": 10620 + }, + { + "epoch": 0.03642581538313927, + "grad_norm": 1.0046017169952393, + "learning_rate": 1e-05, + "loss": 0.2463, + "step": 10630 + }, + { + "epoch": 0.0364600823778553, + "grad_norm": 1.023366928100586, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 10640 + }, + { + "epoch": 0.03649434937257132, + "grad_norm": 1.1663336753845215, + "learning_rate": 1e-05, + "loss": 0.2476, + "step": 10650 + }, + { + "epoch": 0.03652861636728735, + "grad_norm": 1.2302120923995972, + "learning_rate": 1e-05, + "loss": 0.2469, + "step": 10660 + }, + { + "epoch": 0.03656288336200338, + "grad_norm": 1.0369274616241455, + "learning_rate": 1e-05, + "loss": 0.2468, + "step": 10670 + }, + { + "epoch": 0.03659715035671941, + "grad_norm": 1.3913209438323975, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 10680 + }, + { + "epoch": 0.03663141735143544, + "grad_norm": 1.0193636417388916, + "learning_rate": 1e-05, + "loss": 0.25, + "step": 10690 + }, + { + "epoch": 0.03666568434615147, + "grad_norm": 1.2572286128997803, + "learning_rate": 1e-05, + "loss": 0.2441, + "step": 10700 + }, + { + "epoch": 0.0366999513408675, + "grad_norm": 1.1679438352584839, + "learning_rate": 1e-05, + "loss": 0.2462, + "step": 10710 + }, + { + "epoch": 0.03673421833558353, + "grad_norm": 1.1959030628204346, + "learning_rate": 1e-05, + "loss": 0.2641, + "step": 10720 + }, + { + "epoch": 0.03676848533029956, + "grad_norm": 1.1327241659164429, + "learning_rate": 1e-05, + "loss": 0.2538, + "step": 10730 + }, + { + "epoch": 0.03680275232501559, + "grad_norm": 1.0999104976654053, + "learning_rate": 1e-05, + "loss": 0.2492, + "step": 10740 + }, + { + "epoch": 0.03683701931973162, + "grad_norm": 1.1578527688980103, + "learning_rate": 1e-05, + "loss": 0.2506, + "step": 10750 + }, + { + "epoch": 0.03687128631444765, + "grad_norm": 1.243034839630127, + "learning_rate": 1e-05, + "loss": 0.2404, + "step": 10760 + }, + { + "epoch": 0.03690555330916368, + "grad_norm": 1.1157968044281006, + "learning_rate": 1e-05, + "loss": 0.2597, + "step": 10770 + }, + { + "epoch": 0.03693982030387971, + "grad_norm": 1.0121145248413086, + "learning_rate": 1e-05, + "loss": 0.2457, + "step": 10780 + }, + { + "epoch": 0.03697408729859574, + "grad_norm": 1.1467009782791138, + "learning_rate": 1e-05, + "loss": 0.2714, + "step": 10790 + }, + { + "epoch": 0.03700835429331177, + "grad_norm": 1.1445188522338867, + "learning_rate": 1e-05, + "loss": 0.2652, + "step": 10800 + }, + { + "epoch": 0.0370426212880278, + "grad_norm": 0.9861304759979248, + "learning_rate": 1e-05, + "loss": 0.2408, + "step": 10810 + }, + { + "epoch": 0.037076888282743824, + "grad_norm": 1.1524399518966675, + "learning_rate": 1e-05, + "loss": 0.2645, + "step": 10820 + }, + { + "epoch": 0.037111155277459854, + "grad_norm": 1.4263722896575928, + "learning_rate": 1e-05, + "loss": 0.2364, + "step": 10830 + }, + { + "epoch": 0.03714542227217588, + "grad_norm": 1.057739019393921, + "learning_rate": 1e-05, + "loss": 0.2526, + "step": 10840 + }, + { + "epoch": 0.03717968926689191, + "grad_norm": 1.232234239578247, + "learning_rate": 1e-05, + "loss": 0.2605, + "step": 10850 + }, + { + "epoch": 0.03721395626160794, + "grad_norm": 1.2277271747589111, + "learning_rate": 1e-05, + "loss": 0.2646, + "step": 10860 + }, + { + "epoch": 0.03724822325632397, + "grad_norm": 1.1447609663009644, + "learning_rate": 1e-05, + "loss": 0.2508, + "step": 10870 + }, + { + "epoch": 0.03728249025104, + "grad_norm": 1.5821764469146729, + "learning_rate": 1e-05, + "loss": 0.259, + "step": 10880 + }, + { + "epoch": 0.03731675724575603, + "grad_norm": 1.0948309898376465, + "learning_rate": 1e-05, + "loss": 0.2502, + "step": 10890 + }, + { + "epoch": 0.03735102424047206, + "grad_norm": 1.0639638900756836, + "learning_rate": 1e-05, + "loss": 0.2796, + "step": 10900 + }, + { + "epoch": 0.03738529123518809, + "grad_norm": 1.0613322257995605, + "learning_rate": 1e-05, + "loss": 0.2606, + "step": 10910 + }, + { + "epoch": 0.03741955822990412, + "grad_norm": 1.1825395822525024, + "learning_rate": 1e-05, + "loss": 0.2382, + "step": 10920 + }, + { + "epoch": 0.03745382522462015, + "grad_norm": 1.1198760271072388, + "learning_rate": 1e-05, + "loss": 0.2601, + "step": 10930 + }, + { + "epoch": 0.03748809221933618, + "grad_norm": 0.9922842383384705, + "learning_rate": 1e-05, + "loss": 0.2567, + "step": 10940 + }, + { + "epoch": 0.03752235921405221, + "grad_norm": 1.0075231790542603, + "learning_rate": 1e-05, + "loss": 0.2373, + "step": 10950 + }, + { + "epoch": 0.03755662620876824, + "grad_norm": 1.2739824056625366, + "learning_rate": 1e-05, + "loss": 0.2571, + "step": 10960 + }, + { + "epoch": 0.03759089320348427, + "grad_norm": 1.049249529838562, + "learning_rate": 1e-05, + "loss": 0.2505, + "step": 10970 + }, + { + "epoch": 0.037625160198200294, + "grad_norm": 1.0130621194839478, + "learning_rate": 1e-05, + "loss": 0.246, + "step": 10980 + }, + { + "epoch": 0.037659427192916324, + "grad_norm": 1.143740177154541, + "learning_rate": 1e-05, + "loss": 0.2584, + "step": 10990 + }, + { + "epoch": 0.037693694187632354, + "grad_norm": 1.1482734680175781, + "learning_rate": 1e-05, + "loss": 0.245, + "step": 11000 + }, + { + "epoch": 0.037693694187632354, + "eval_cer": 13.150467454577527, + "eval_loss": 0.24854739010334015, + "eval_normalized_cer": 9.492406075139888, + "eval_runtime": 227.9, + "eval_samples_per_second": 2.247, + "eval_steps_per_second": 0.035, + "step": 11000 + }, + { + "epoch": 0.037727961182348384, + "grad_norm": 1.175059199333191, + "learning_rate": 1e-05, + "loss": 0.235, + "step": 11010 + }, + { + "epoch": 0.037762228177064414, + "grad_norm": 1.06391441822052, + "learning_rate": 1e-05, + "loss": 0.2548, + "step": 11020 + }, + { + "epoch": 0.03779649517178044, + "grad_norm": 1.1072652339935303, + "learning_rate": 1e-05, + "loss": 0.2563, + "step": 11030 + }, + { + "epoch": 0.03783076216649647, + "grad_norm": 1.0364381074905396, + "learning_rate": 1e-05, + "loss": 0.2353, + "step": 11040 + }, + { + "epoch": 0.0378650291612125, + "grad_norm": 1.120888113975525, + "learning_rate": 1e-05, + "loss": 0.2564, + "step": 11050 + }, + { + "epoch": 0.03789929615592853, + "grad_norm": 1.1769522428512573, + "learning_rate": 1e-05, + "loss": 0.2485, + "step": 11060 + }, + { + "epoch": 0.03793356315064456, + "grad_norm": 1.103209137916565, + "learning_rate": 1e-05, + "loss": 0.2579, + "step": 11070 + }, + { + "epoch": 0.03796783014536059, + "grad_norm": 1.191256046295166, + "learning_rate": 1e-05, + "loss": 0.258, + "step": 11080 + }, + { + "epoch": 0.03800209714007662, + "grad_norm": 1.103756070137024, + "learning_rate": 1e-05, + "loss": 0.2542, + "step": 11090 + }, + { + "epoch": 0.03803636413479265, + "grad_norm": 1.2097468376159668, + "learning_rate": 1e-05, + "loss": 0.2581, + "step": 11100 + }, + { + "epoch": 0.03807063112950868, + "grad_norm": 1.0787367820739746, + "learning_rate": 1e-05, + "loss": 0.2428, + "step": 11110 + }, + { + "epoch": 0.03810489812422471, + "grad_norm": 1.0501831769943237, + "learning_rate": 1e-05, + "loss": 0.2585, + "step": 11120 + }, + { + "epoch": 0.03813916511894074, + "grad_norm": 1.1210603713989258, + "learning_rate": 1e-05, + "loss": 0.2435, + "step": 11130 + }, + { + "epoch": 0.038173432113656765, + "grad_norm": 1.0846295356750488, + "learning_rate": 1e-05, + "loss": 0.2525, + "step": 11140 + }, + { + "epoch": 0.038207699108372795, + "grad_norm": 1.176488995552063, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 11150 + }, + { + "epoch": 0.038241966103088824, + "grad_norm": 1.034157395362854, + "learning_rate": 1e-05, + "loss": 0.2701, + "step": 11160 + }, + { + "epoch": 0.038276233097804854, + "grad_norm": 1.0763425827026367, + "learning_rate": 1e-05, + "loss": 0.2327, + "step": 11170 + }, + { + "epoch": 0.038310500092520884, + "grad_norm": 1.010324478149414, + "learning_rate": 1e-05, + "loss": 0.2506, + "step": 11180 + }, + { + "epoch": 0.038344767087236914, + "grad_norm": 1.0999796390533447, + "learning_rate": 1e-05, + "loss": 0.2482, + "step": 11190 + }, + { + "epoch": 0.038379034081952944, + "grad_norm": 1.2314294576644897, + "learning_rate": 1e-05, + "loss": 0.2413, + "step": 11200 + }, + { + "epoch": 0.038413301076668974, + "grad_norm": 1.109063982963562, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 11210 + }, + { + "epoch": 0.038447568071385003, + "grad_norm": 1.0760470628738403, + "learning_rate": 1e-05, + "loss": 0.2572, + "step": 11220 + }, + { + "epoch": 0.03848183506610103, + "grad_norm": 1.2139952182769775, + "learning_rate": 1e-05, + "loss": 0.2542, + "step": 11230 + }, + { + "epoch": 0.03851610206081706, + "grad_norm": 1.0825960636138916, + "learning_rate": 1e-05, + "loss": 0.2446, + "step": 11240 + }, + { + "epoch": 0.03855036905553309, + "grad_norm": 1.1650110483169556, + "learning_rate": 1e-05, + "loss": 0.2486, + "step": 11250 + }, + { + "epoch": 0.03858463605024912, + "grad_norm": 1.074236512184143, + "learning_rate": 1e-05, + "loss": 0.2623, + "step": 11260 + }, + { + "epoch": 0.03861890304496515, + "grad_norm": 1.0651731491088867, + "learning_rate": 1e-05, + "loss": 0.2409, + "step": 11270 + }, + { + "epoch": 0.03865317003968118, + "grad_norm": 1.0689282417297363, + "learning_rate": 1e-05, + "loss": 0.2329, + "step": 11280 + }, + { + "epoch": 0.03868743703439721, + "grad_norm": 1.1548572778701782, + "learning_rate": 1e-05, + "loss": 0.2572, + "step": 11290 + }, + { + "epoch": 0.038721704029113235, + "grad_norm": 1.1205992698669434, + "learning_rate": 1e-05, + "loss": 0.2424, + "step": 11300 + }, + { + "epoch": 0.038755971023829265, + "grad_norm": 1.0806999206542969, + "learning_rate": 1e-05, + "loss": 0.2336, + "step": 11310 + }, + { + "epoch": 0.038790238018545295, + "grad_norm": 1.0330880880355835, + "learning_rate": 1e-05, + "loss": 0.2371, + "step": 11320 + }, + { + "epoch": 0.038824505013261325, + "grad_norm": 1.2580816745758057, + "learning_rate": 1e-05, + "loss": 0.2297, + "step": 11330 + }, + { + "epoch": 0.038858772007977355, + "grad_norm": 1.3312656879425049, + "learning_rate": 1e-05, + "loss": 0.2478, + "step": 11340 + }, + { + "epoch": 0.038893039002693384, + "grad_norm": 1.0043836832046509, + "learning_rate": 1e-05, + "loss": 0.2388, + "step": 11350 + }, + { + "epoch": 0.038927305997409414, + "grad_norm": 1.0665231943130493, + "learning_rate": 1e-05, + "loss": 0.234, + "step": 11360 + }, + { + "epoch": 0.038961572992125444, + "grad_norm": 1.1114041805267334, + "learning_rate": 1e-05, + "loss": 0.2407, + "step": 11370 + }, + { + "epoch": 0.038995839986841474, + "grad_norm": 1.043134093284607, + "learning_rate": 1e-05, + "loss": 0.2352, + "step": 11380 + }, + { + "epoch": 0.039030106981557504, + "grad_norm": 1.1435351371765137, + "learning_rate": 1e-05, + "loss": 0.2347, + "step": 11390 + }, + { + "epoch": 0.039064373976273534, + "grad_norm": 1.2625036239624023, + "learning_rate": 1e-05, + "loss": 0.2515, + "step": 11400 + }, + { + "epoch": 0.039098640970989564, + "grad_norm": 1.101953148841858, + "learning_rate": 1e-05, + "loss": 0.2354, + "step": 11410 + }, + { + "epoch": 0.03913290796570559, + "grad_norm": 1.0906771421432495, + "learning_rate": 1e-05, + "loss": 0.2145, + "step": 11420 + }, + { + "epoch": 0.03916717496042162, + "grad_norm": 1.2161511182785034, + "learning_rate": 1e-05, + "loss": 0.2366, + "step": 11430 + }, + { + "epoch": 0.03920144195513765, + "grad_norm": 1.0494539737701416, + "learning_rate": 1e-05, + "loss": 0.2445, + "step": 11440 + }, + { + "epoch": 0.03923570894985368, + "grad_norm": 1.098476529121399, + "learning_rate": 1e-05, + "loss": 0.2392, + "step": 11450 + }, + { + "epoch": 0.03926997594456971, + "grad_norm": 1.1904308795928955, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 11460 + }, + { + "epoch": 0.039304242939285736, + "grad_norm": 1.1967114210128784, + "learning_rate": 1e-05, + "loss": 0.2489, + "step": 11470 + }, + { + "epoch": 0.039338509934001765, + "grad_norm": 1.2335082292556763, + "learning_rate": 1e-05, + "loss": 0.2428, + "step": 11480 + }, + { + "epoch": 0.039372776928717795, + "grad_norm": 1.0949335098266602, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 11490 + }, + { + "epoch": 0.039407043923433825, + "grad_norm": 0.9655303955078125, + "learning_rate": 1e-05, + "loss": 0.2298, + "step": 11500 + }, + { + "epoch": 0.039407043923433825, + "eval_cer": 13.238666431469396, + "eval_loss": 0.24733339250087738, + "eval_normalized_cer": 9.532374100719425, + "eval_runtime": 228.8524, + "eval_samples_per_second": 2.237, + "eval_steps_per_second": 0.035, + "step": 11500 + }, + { + "epoch": 0.039441310918149855, + "grad_norm": 1.303244709968567, + "learning_rate": 1e-05, + "loss": 0.2312, + "step": 11510 + }, + { + "epoch": 0.039475577912865885, + "grad_norm": 1.2272698879241943, + "learning_rate": 1e-05, + "loss": 0.2371, + "step": 11520 + }, + { + "epoch": 0.039509844907581915, + "grad_norm": 1.2555683851242065, + "learning_rate": 1e-05, + "loss": 0.248, + "step": 11530 + }, + { + "epoch": 0.039544111902297945, + "grad_norm": 1.2972025871276855, + "learning_rate": 1e-05, + "loss": 0.2637, + "step": 11540 + }, + { + "epoch": 0.039578378897013974, + "grad_norm": 0.9733885526657104, + "learning_rate": 1e-05, + "loss": 0.2397, + "step": 11550 + }, + { + "epoch": 0.039612645891730004, + "grad_norm": 1.1637110710144043, + "learning_rate": 1e-05, + "loss": 0.2468, + "step": 11560 + }, + { + "epoch": 0.039646912886446034, + "grad_norm": 1.2355756759643555, + "learning_rate": 1e-05, + "loss": 0.2407, + "step": 11570 + }, + { + "epoch": 0.039681179881162064, + "grad_norm": 1.1079312562942505, + "learning_rate": 1e-05, + "loss": 0.2511, + "step": 11580 + }, + { + "epoch": 0.039715446875878094, + "grad_norm": 1.13614821434021, + "learning_rate": 1e-05, + "loss": 0.2442, + "step": 11590 + }, + { + "epoch": 0.039749713870594124, + "grad_norm": 1.2050237655639648, + "learning_rate": 1e-05, + "loss": 0.2505, + "step": 11600 + }, + { + "epoch": 0.03978398086531015, + "grad_norm": 1.020393967628479, + "learning_rate": 1e-05, + "loss": 0.2305, + "step": 11610 + }, + { + "epoch": 0.03981824786002618, + "grad_norm": 1.088463544845581, + "learning_rate": 1e-05, + "loss": 0.2542, + "step": 11620 + }, + { + "epoch": 0.039852514854742206, + "grad_norm": 1.121472716331482, + "learning_rate": 1e-05, + "loss": 0.2489, + "step": 11630 + }, + { + "epoch": 0.039886781849458236, + "grad_norm": 1.109485149383545, + "learning_rate": 1e-05, + "loss": 0.2488, + "step": 11640 + }, + { + "epoch": 0.039921048844174266, + "grad_norm": 1.0824321508407593, + "learning_rate": 1e-05, + "loss": 0.2395, + "step": 11650 + }, + { + "epoch": 0.039955315838890296, + "grad_norm": 1.1438390016555786, + "learning_rate": 1e-05, + "loss": 0.2318, + "step": 11660 + }, + { + "epoch": 0.039989582833606326, + "grad_norm": 1.005821943283081, + "learning_rate": 1e-05, + "loss": 0.2482, + "step": 11670 + }, + { + "epoch": 0.040023849828322355, + "grad_norm": 1.187921166419983, + "learning_rate": 1e-05, + "loss": 0.2479, + "step": 11680 + }, + { + "epoch": 0.040058116823038385, + "grad_norm": 1.0112143754959106, + "learning_rate": 1e-05, + "loss": 0.2555, + "step": 11690 + }, + { + "epoch": 0.040092383817754415, + "grad_norm": 1.1568365097045898, + "learning_rate": 1e-05, + "loss": 0.2483, + "step": 11700 + }, + { + "epoch": 0.040126650812470445, + "grad_norm": 1.1201119422912598, + "learning_rate": 1e-05, + "loss": 0.256, + "step": 11710 + }, + { + "epoch": 0.040160917807186475, + "grad_norm": 1.1281081438064575, + "learning_rate": 1e-05, + "loss": 0.2462, + "step": 11720 + }, + { + "epoch": 0.040195184801902505, + "grad_norm": 0.9730721116065979, + "learning_rate": 1e-05, + "loss": 0.2534, + "step": 11730 + }, + { + "epoch": 0.040229451796618534, + "grad_norm": 1.1069108247756958, + "learning_rate": 1e-05, + "loss": 0.2497, + "step": 11740 + }, + { + "epoch": 0.040263718791334564, + "grad_norm": 1.1095935106277466, + "learning_rate": 1e-05, + "loss": 0.2448, + "step": 11750 + }, + { + "epoch": 0.040297985786050594, + "grad_norm": 0.9690611958503723, + "learning_rate": 1e-05, + "loss": 0.2448, + "step": 11760 + }, + { + "epoch": 0.040332252780766624, + "grad_norm": 1.0263612270355225, + "learning_rate": 1e-05, + "loss": 0.2555, + "step": 11770 + }, + { + "epoch": 0.040366519775482654, + "grad_norm": 1.0731168985366821, + "learning_rate": 1e-05, + "loss": 0.2321, + "step": 11780 + }, + { + "epoch": 0.04040078677019868, + "grad_norm": 1.1446433067321777, + "learning_rate": 1e-05, + "loss": 0.239, + "step": 11790 + }, + { + "epoch": 0.04043505376491471, + "grad_norm": 1.0776352882385254, + "learning_rate": 1e-05, + "loss": 0.2255, + "step": 11800 + }, + { + "epoch": 0.040469320759630736, + "grad_norm": 0.9721156358718872, + "learning_rate": 1e-05, + "loss": 0.2234, + "step": 11810 + }, + { + "epoch": 0.040503587754346766, + "grad_norm": 0.9534703493118286, + "learning_rate": 1e-05, + "loss": 0.2163, + "step": 11820 + }, + { + "epoch": 0.040537854749062796, + "grad_norm": 1.0248794555664062, + "learning_rate": 1e-05, + "loss": 0.2406, + "step": 11830 + }, + { + "epoch": 0.040572121743778826, + "grad_norm": 1.1740145683288574, + "learning_rate": 1e-05, + "loss": 0.2394, + "step": 11840 + }, + { + "epoch": 0.040606388738494856, + "grad_norm": 1.1622172594070435, + "learning_rate": 1e-05, + "loss": 0.2387, + "step": 11850 + }, + { + "epoch": 0.040640655733210886, + "grad_norm": 1.0684759616851807, + "learning_rate": 1e-05, + "loss": 0.2196, + "step": 11860 + }, + { + "epoch": 0.040674922727926915, + "grad_norm": 1.024851679801941, + "learning_rate": 1e-05, + "loss": 0.2178, + "step": 11870 + }, + { + "epoch": 0.040709189722642945, + "grad_norm": 1.2293421030044556, + "learning_rate": 1e-05, + "loss": 0.2372, + "step": 11880 + }, + { + "epoch": 0.040743456717358975, + "grad_norm": 1.2226061820983887, + "learning_rate": 1e-05, + "loss": 0.2466, + "step": 11890 + }, + { + "epoch": 0.040777723712075005, + "grad_norm": 1.0775419473648071, + "learning_rate": 1e-05, + "loss": 0.221, + "step": 11900 + }, + { + "epoch": 0.040811990706791035, + "grad_norm": 1.0354384183883667, + "learning_rate": 1e-05, + "loss": 0.2338, + "step": 11910 + }, + { + "epoch": 0.040846257701507065, + "grad_norm": 0.9725399613380432, + "learning_rate": 1e-05, + "loss": 0.2312, + "step": 11920 + }, + { + "epoch": 0.040880524696223094, + "grad_norm": 0.9638645648956299, + "learning_rate": 1e-05, + "loss": 0.2238, + "step": 11930 + }, + { + "epoch": 0.040914791690939124, + "grad_norm": 1.1646082401275635, + "learning_rate": 1e-05, + "loss": 0.2339, + "step": 11940 + }, + { + "epoch": 0.04094905868565515, + "grad_norm": 1.049614429473877, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 11950 + }, + { + "epoch": 0.04098332568037118, + "grad_norm": 1.1187442541122437, + "learning_rate": 1e-05, + "loss": 0.2437, + "step": 11960 + }, + { + "epoch": 0.04101759267508721, + "grad_norm": 1.1472731828689575, + "learning_rate": 1e-05, + "loss": 0.2416, + "step": 11970 + }, + { + "epoch": 0.04105185966980324, + "grad_norm": 1.0694329738616943, + "learning_rate": 1e-05, + "loss": 0.2308, + "step": 11980 + }, + { + "epoch": 0.04108612666451927, + "grad_norm": 0.9863060116767883, + "learning_rate": 1e-05, + "loss": 0.2258, + "step": 11990 + }, + { + "epoch": 0.041120393659235296, + "grad_norm": 1.1150392293930054, + "learning_rate": 1e-05, + "loss": 0.2342, + "step": 12000 + }, + { + "epoch": 0.041120393659235296, + "eval_cer": 12.797671547010056, + "eval_loss": 0.2504700720310211, + "eval_normalized_cer": 9.152677857713828, + "eval_runtime": 227.2028, + "eval_samples_per_second": 2.253, + "eval_steps_per_second": 0.035, + "step": 12000 + }, + { + "epoch": 0.041154660653951326, + "grad_norm": 1.0455725193023682, + "learning_rate": 1e-05, + "loss": 0.2263, + "step": 12010 + }, + { + "epoch": 0.041188927648667356, + "grad_norm": 1.2993946075439453, + "learning_rate": 1e-05, + "loss": 0.2273, + "step": 12020 + }, + { + "epoch": 0.041223194643383386, + "grad_norm": 1.159058690071106, + "learning_rate": 1e-05, + "loss": 0.2259, + "step": 12030 + }, + { + "epoch": 0.041257461638099416, + "grad_norm": 1.1908732652664185, + "learning_rate": 1e-05, + "loss": 0.2489, + "step": 12040 + }, + { + "epoch": 0.041291728632815446, + "grad_norm": 1.3122719526290894, + "learning_rate": 1e-05, + "loss": 0.2544, + "step": 12050 + }, + { + "epoch": 0.041325995627531475, + "grad_norm": 1.1521992683410645, + "learning_rate": 1e-05, + "loss": 0.2496, + "step": 12060 + }, + { + "epoch": 0.041360262622247505, + "grad_norm": 1.1007260084152222, + "learning_rate": 1e-05, + "loss": 0.2453, + "step": 12070 + }, + { + "epoch": 0.041394529616963535, + "grad_norm": 1.1719632148742676, + "learning_rate": 1e-05, + "loss": 0.2419, + "step": 12080 + }, + { + "epoch": 0.041428796611679565, + "grad_norm": 1.088536024093628, + "learning_rate": 1e-05, + "loss": 0.2408, + "step": 12090 + }, + { + "epoch": 0.041463063606395595, + "grad_norm": 1.1596314907073975, + "learning_rate": 1e-05, + "loss": 0.2463, + "step": 12100 + }, + { + "epoch": 0.041497330601111625, + "grad_norm": 1.1144077777862549, + "learning_rate": 1e-05, + "loss": 0.2411, + "step": 12110 + }, + { + "epoch": 0.04153159759582765, + "grad_norm": 1.2945681810379028, + "learning_rate": 1e-05, + "loss": 0.2381, + "step": 12120 + }, + { + "epoch": 0.04156586459054368, + "grad_norm": 1.0997275114059448, + "learning_rate": 1e-05, + "loss": 0.2704, + "step": 12130 + }, + { + "epoch": 0.04160013158525971, + "grad_norm": 1.0945874452590942, + "learning_rate": 1e-05, + "loss": 0.2249, + "step": 12140 + }, + { + "epoch": 0.04163439857997574, + "grad_norm": 1.2051665782928467, + "learning_rate": 1e-05, + "loss": 0.2442, + "step": 12150 + }, + { + "epoch": 0.04166866557469177, + "grad_norm": 0.9178060293197632, + "learning_rate": 1e-05, + "loss": 0.2519, + "step": 12160 + }, + { + "epoch": 0.0417029325694078, + "grad_norm": 1.1225532293319702, + "learning_rate": 1e-05, + "loss": 0.2425, + "step": 12170 + }, + { + "epoch": 0.04173719956412383, + "grad_norm": 1.0947092771530151, + "learning_rate": 1e-05, + "loss": 0.2305, + "step": 12180 + }, + { + "epoch": 0.041771466558839856, + "grad_norm": 1.0374338626861572, + "learning_rate": 1e-05, + "loss": 0.2445, + "step": 12190 + }, + { + "epoch": 0.041805733553555886, + "grad_norm": 1.1471805572509766, + "learning_rate": 1e-05, + "loss": 0.2447, + "step": 12200 + }, + { + "epoch": 0.041840000548271916, + "grad_norm": 1.1241774559020996, + "learning_rate": 1e-05, + "loss": 0.2235, + "step": 12210 + }, + { + "epoch": 0.041874267542987946, + "grad_norm": 1.243691086769104, + "learning_rate": 1e-05, + "loss": 0.2409, + "step": 12220 + }, + { + "epoch": 0.041908534537703976, + "grad_norm": 1.1077616214752197, + "learning_rate": 1e-05, + "loss": 0.2533, + "step": 12230 + }, + { + "epoch": 0.041942801532420006, + "grad_norm": 1.0907562971115112, + "learning_rate": 1e-05, + "loss": 0.2443, + "step": 12240 + }, + { + "epoch": 0.041977068527136036, + "grad_norm": 1.3562718629837036, + "learning_rate": 1e-05, + "loss": 0.2523, + "step": 12250 + }, + { + "epoch": 0.042011335521852065, + "grad_norm": 1.0229142904281616, + "learning_rate": 1e-05, + "loss": 0.2328, + "step": 12260 + }, + { + "epoch": 0.042045602516568095, + "grad_norm": 1.0843278169631958, + "learning_rate": 1e-05, + "loss": 0.2328, + "step": 12270 + }, + { + "epoch": 0.04207986951128412, + "grad_norm": 1.2029650211334229, + "learning_rate": 1e-05, + "loss": 0.2458, + "step": 12280 + }, + { + "epoch": 0.04211413650600015, + "grad_norm": 0.9493764638900757, + "learning_rate": 1e-05, + "loss": 0.2232, + "step": 12290 + }, + { + "epoch": 0.04214840350071618, + "grad_norm": 1.2031728029251099, + "learning_rate": 1e-05, + "loss": 0.2473, + "step": 12300 + }, + { + "epoch": 0.04218267049543221, + "grad_norm": 1.1091227531433105, + "learning_rate": 1e-05, + "loss": 0.2429, + "step": 12310 + }, + { + "epoch": 0.04221693749014824, + "grad_norm": 1.1729086637496948, + "learning_rate": 1e-05, + "loss": 0.2594, + "step": 12320 + }, + { + "epoch": 0.04225120448486427, + "grad_norm": 1.0592730045318604, + "learning_rate": 1e-05, + "loss": 0.2506, + "step": 12330 + }, + { + "epoch": 0.0422854714795803, + "grad_norm": 1.2366282939910889, + "learning_rate": 1e-05, + "loss": 0.2457, + "step": 12340 + }, + { + "epoch": 0.04231973847429633, + "grad_norm": 1.12427818775177, + "learning_rate": 1e-05, + "loss": 0.246, + "step": 12350 + }, + { + "epoch": 0.04235400546901236, + "grad_norm": 1.1663504838943481, + "learning_rate": 1e-05, + "loss": 0.2491, + "step": 12360 + }, + { + "epoch": 0.04238827246372839, + "grad_norm": 1.2383378744125366, + "learning_rate": 1e-05, + "loss": 0.2492, + "step": 12370 + }, + { + "epoch": 0.04242253945844442, + "grad_norm": 1.184813380241394, + "learning_rate": 1e-05, + "loss": 0.2498, + "step": 12380 + }, + { + "epoch": 0.042456806453160446, + "grad_norm": 1.035650610923767, + "learning_rate": 1e-05, + "loss": 0.2317, + "step": 12390 + }, + { + "epoch": 0.042491073447876476, + "grad_norm": 1.0495967864990234, + "learning_rate": 1e-05, + "loss": 0.2467, + "step": 12400 + }, + { + "epoch": 0.042525340442592506, + "grad_norm": 1.0791754722595215, + "learning_rate": 1e-05, + "loss": 0.245, + "step": 12410 + }, + { + "epoch": 0.042559607437308536, + "grad_norm": 1.1513383388519287, + "learning_rate": 1e-05, + "loss": 0.2599, + "step": 12420 + }, + { + "epoch": 0.042593874432024566, + "grad_norm": 1.1093658208847046, + "learning_rate": 1e-05, + "loss": 0.2584, + "step": 12430 + }, + { + "epoch": 0.04262814142674059, + "grad_norm": 1.0504255294799805, + "learning_rate": 1e-05, + "loss": 0.2355, + "step": 12440 + }, + { + "epoch": 0.04266240842145662, + "grad_norm": 1.121837854385376, + "learning_rate": 1e-05, + "loss": 0.2309, + "step": 12450 + }, + { + "epoch": 0.04269667541617265, + "grad_norm": 1.2266592979431152, + "learning_rate": 1e-05, + "loss": 0.2384, + "step": 12460 + }, + { + "epoch": 0.04273094241088868, + "grad_norm": 1.0912328958511353, + "learning_rate": 1e-05, + "loss": 0.2392, + "step": 12470 + }, + { + "epoch": 0.04276520940560471, + "grad_norm": 1.1504424810409546, + "learning_rate": 1e-05, + "loss": 0.2716, + "step": 12480 + }, + { + "epoch": 0.04279947640032074, + "grad_norm": 1.011088490486145, + "learning_rate": 1e-05, + "loss": 0.2501, + "step": 12490 + }, + { + "epoch": 0.04283374339503677, + "grad_norm": 1.2423217296600342, + "learning_rate": 1e-05, + "loss": 0.2562, + "step": 12500 + }, + { + "epoch": 0.04283374339503677, + "eval_cer": 12.92115011465867, + "eval_loss": 0.24911069869995117, + "eval_normalized_cer": 9.452438049560353, + "eval_runtime": 228.1651, + "eval_samples_per_second": 2.244, + "eval_steps_per_second": 0.035, + "step": 12500 + }, + { + "epoch": 0.0428680103897528, + "grad_norm": 1.4834926128387451, + "learning_rate": 1e-05, + "loss": 0.2354, + "step": 12510 + }, + { + "epoch": 0.04290227738446883, + "grad_norm": 1.26629638671875, + "learning_rate": 1e-05, + "loss": 0.2569, + "step": 12520 + }, + { + "epoch": 0.04293654437918486, + "grad_norm": 1.204516053199768, + "learning_rate": 1e-05, + "loss": 0.2502, + "step": 12530 + }, + { + "epoch": 0.04297081137390089, + "grad_norm": 1.0527433156967163, + "learning_rate": 1e-05, + "loss": 0.2503, + "step": 12540 + }, + { + "epoch": 0.04300507836861692, + "grad_norm": 1.0310479402542114, + "learning_rate": 1e-05, + "loss": 0.2289, + "step": 12550 + }, + { + "epoch": 0.04303934536333295, + "grad_norm": 1.2252111434936523, + "learning_rate": 1e-05, + "loss": 0.2485, + "step": 12560 + }, + { + "epoch": 0.04307361235804898, + "grad_norm": 1.0729095935821533, + "learning_rate": 1e-05, + "loss": 0.2302, + "step": 12570 + }, + { + "epoch": 0.043107879352765006, + "grad_norm": 1.000106930732727, + "learning_rate": 1e-05, + "loss": 0.2192, + "step": 12580 + }, + { + "epoch": 0.043142146347481036, + "grad_norm": 1.0674782991409302, + "learning_rate": 1e-05, + "loss": 0.2334, + "step": 12590 + }, + { + "epoch": 0.04317641334219706, + "grad_norm": 1.1148403882980347, + "learning_rate": 1e-05, + "loss": 0.2402, + "step": 12600 + }, + { + "epoch": 0.04321068033691309, + "grad_norm": 1.0144375562667847, + "learning_rate": 1e-05, + "loss": 0.2492, + "step": 12610 + }, + { + "epoch": 0.04324494733162912, + "grad_norm": 1.1123058795928955, + "learning_rate": 1e-05, + "loss": 0.2334, + "step": 12620 + }, + { + "epoch": 0.04327921432634515, + "grad_norm": 1.1008777618408203, + "learning_rate": 1e-05, + "loss": 0.2319, + "step": 12630 + }, + { + "epoch": 0.04331348132106118, + "grad_norm": 1.1487098932266235, + "learning_rate": 1e-05, + "loss": 0.2451, + "step": 12640 + }, + { + "epoch": 0.04334774831577721, + "grad_norm": 1.1339664459228516, + "learning_rate": 1e-05, + "loss": 0.2267, + "step": 12650 + }, + { + "epoch": 0.04338201531049324, + "grad_norm": 1.198195219039917, + "learning_rate": 1e-05, + "loss": 0.2472, + "step": 12660 + }, + { + "epoch": 0.04341628230520927, + "grad_norm": 0.9989431500434875, + "learning_rate": 1e-05, + "loss": 0.2381, + "step": 12670 + }, + { + "epoch": 0.0434505492999253, + "grad_norm": 1.4252516031265259, + "learning_rate": 1e-05, + "loss": 0.2513, + "step": 12680 + }, + { + "epoch": 0.04348481629464133, + "grad_norm": 1.1313762664794922, + "learning_rate": 1e-05, + "loss": 0.2591, + "step": 12690 + }, + { + "epoch": 0.04351908328935736, + "grad_norm": 1.0512256622314453, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 12700 + }, + { + "epoch": 0.04355335028407339, + "grad_norm": 1.158078670501709, + "learning_rate": 1e-05, + "loss": 0.2446, + "step": 12710 + }, + { + "epoch": 0.04358761727878942, + "grad_norm": 1.0620396137237549, + "learning_rate": 1e-05, + "loss": 0.2403, + "step": 12720 + }, + { + "epoch": 0.04362188427350545, + "grad_norm": 1.0640372037887573, + "learning_rate": 1e-05, + "loss": 0.2378, + "step": 12730 + }, + { + "epoch": 0.04365615126822148, + "grad_norm": 1.113105058670044, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 12740 + }, + { + "epoch": 0.04369041826293751, + "grad_norm": 1.0416456460952759, + "learning_rate": 1e-05, + "loss": 0.2424, + "step": 12750 + }, + { + "epoch": 0.04372468525765354, + "grad_norm": 1.2312722206115723, + "learning_rate": 1e-05, + "loss": 0.2397, + "step": 12760 + }, + { + "epoch": 0.04375895225236956, + "grad_norm": 1.228950023651123, + "learning_rate": 1e-05, + "loss": 0.2462, + "step": 12770 + }, + { + "epoch": 0.04379321924708559, + "grad_norm": 1.0736054182052612, + "learning_rate": 1e-05, + "loss": 0.2299, + "step": 12780 + }, + { + "epoch": 0.04382748624180162, + "grad_norm": 1.4141355752944946, + "learning_rate": 1e-05, + "loss": 0.2377, + "step": 12790 + }, + { + "epoch": 0.04386175323651765, + "grad_norm": 1.2224112749099731, + "learning_rate": 1e-05, + "loss": 0.2334, + "step": 12800 + }, + { + "epoch": 0.04389602023123368, + "grad_norm": 1.3090282678604126, + "learning_rate": 1e-05, + "loss": 0.2548, + "step": 12810 + }, + { + "epoch": 0.04393028722594971, + "grad_norm": 1.0864715576171875, + "learning_rate": 1e-05, + "loss": 0.2711, + "step": 12820 + }, + { + "epoch": 0.04396455422066574, + "grad_norm": 1.0953795909881592, + "learning_rate": 1e-05, + "loss": 0.2467, + "step": 12830 + }, + { + "epoch": 0.04399882121538177, + "grad_norm": 0.9681864976882935, + "learning_rate": 1e-05, + "loss": 0.2217, + "step": 12840 + }, + { + "epoch": 0.0440330882100978, + "grad_norm": 0.9268914461135864, + "learning_rate": 1e-05, + "loss": 0.2492, + "step": 12850 + }, + { + "epoch": 0.04406735520481383, + "grad_norm": 1.0900733470916748, + "learning_rate": 1e-05, + "loss": 0.2421, + "step": 12860 + }, + { + "epoch": 0.04410162219952986, + "grad_norm": 1.1551947593688965, + "learning_rate": 1e-05, + "loss": 0.2549, + "step": 12870 + }, + { + "epoch": 0.04413588919424589, + "grad_norm": 1.0035364627838135, + "learning_rate": 1e-05, + "loss": 0.248, + "step": 12880 + }, + { + "epoch": 0.04417015618896192, + "grad_norm": 1.2478151321411133, + "learning_rate": 1e-05, + "loss": 0.253, + "step": 12890 + }, + { + "epoch": 0.04420442318367795, + "grad_norm": 1.1512874364852905, + "learning_rate": 1e-05, + "loss": 0.2247, + "step": 12900 + }, + { + "epoch": 0.04423869017839398, + "grad_norm": 1.2012622356414795, + "learning_rate": 1e-05, + "loss": 0.2635, + "step": 12910 + }, + { + "epoch": 0.04427295717311001, + "grad_norm": 1.1266357898712158, + "learning_rate": 1e-05, + "loss": 0.2464, + "step": 12920 + }, + { + "epoch": 0.04430722416782603, + "grad_norm": 1.11850905418396, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 12930 + }, + { + "epoch": 0.04434149116254206, + "grad_norm": 1.1375716924667358, + "learning_rate": 1e-05, + "loss": 0.2474, + "step": 12940 + }, + { + "epoch": 0.04437575815725809, + "grad_norm": 1.3423253297805786, + "learning_rate": 1e-05, + "loss": 0.2525, + "step": 12950 + }, + { + "epoch": 0.04441002515197412, + "grad_norm": 1.0608446598052979, + "learning_rate": 1e-05, + "loss": 0.2445, + "step": 12960 + }, + { + "epoch": 0.04444429214669015, + "grad_norm": 1.059899091720581, + "learning_rate": 1e-05, + "loss": 0.2444, + "step": 12970 + }, + { + "epoch": 0.04447855914140618, + "grad_norm": 1.117346167564392, + "learning_rate": 1e-05, + "loss": 0.2529, + "step": 12980 + }, + { + "epoch": 0.04451282613612221, + "grad_norm": 1.2896045446395874, + "learning_rate": 1e-05, + "loss": 0.2473, + "step": 12990 + }, + { + "epoch": 0.04454709313083824, + "grad_norm": 0.9721153974533081, + "learning_rate": 1e-05, + "loss": 0.2461, + "step": 13000 + }, + { + "epoch": 0.04454709313083824, + "eval_cer": 12.84177103545599, + "eval_loss": 0.2521709203720093, + "eval_normalized_cer": 9.152677857713828, + "eval_runtime": 226.778, + "eval_samples_per_second": 2.258, + "eval_steps_per_second": 0.035, + "step": 13000 + }, + { + "epoch": 0.04458136012555427, + "grad_norm": 1.232352375984192, + "learning_rate": 1e-05, + "loss": 0.2387, + "step": 13010 + }, + { + "epoch": 0.0446156271202703, + "grad_norm": 1.2386256456375122, + "learning_rate": 1e-05, + "loss": 0.255, + "step": 13020 + }, + { + "epoch": 0.04464989411498633, + "grad_norm": 1.2183597087860107, + "learning_rate": 1e-05, + "loss": 0.2635, + "step": 13030 + }, + { + "epoch": 0.04468416110970236, + "grad_norm": 1.166823387145996, + "learning_rate": 1e-05, + "loss": 0.2563, + "step": 13040 + }, + { + "epoch": 0.04471842810441839, + "grad_norm": 1.1574853658676147, + "learning_rate": 1e-05, + "loss": 0.2557, + "step": 13050 + }, + { + "epoch": 0.04475269509913442, + "grad_norm": 1.1207836866378784, + "learning_rate": 1e-05, + "loss": 0.2578, + "step": 13060 + }, + { + "epoch": 0.04478696209385045, + "grad_norm": 1.2590343952178955, + "learning_rate": 1e-05, + "loss": 0.2551, + "step": 13070 + }, + { + "epoch": 0.04482122908856648, + "grad_norm": 1.0984435081481934, + "learning_rate": 1e-05, + "loss": 0.2816, + "step": 13080 + }, + { + "epoch": 0.0448554960832825, + "grad_norm": 1.1435647010803223, + "learning_rate": 1e-05, + "loss": 0.2481, + "step": 13090 + }, + { + "epoch": 0.04488976307799853, + "grad_norm": 1.1446672677993774, + "learning_rate": 1e-05, + "loss": 0.2561, + "step": 13100 + }, + { + "epoch": 0.04492403007271456, + "grad_norm": 1.0957670211791992, + "learning_rate": 1e-05, + "loss": 0.2458, + "step": 13110 + }, + { + "epoch": 0.04495829706743059, + "grad_norm": 1.1321167945861816, + "learning_rate": 1e-05, + "loss": 0.2621, + "step": 13120 + }, + { + "epoch": 0.04499256406214662, + "grad_norm": 1.140914797782898, + "learning_rate": 1e-05, + "loss": 0.2376, + "step": 13130 + }, + { + "epoch": 0.04502683105686265, + "grad_norm": 1.1879481077194214, + "learning_rate": 1e-05, + "loss": 0.2457, + "step": 13140 + }, + { + "epoch": 0.04506109805157868, + "grad_norm": 1.240084171295166, + "learning_rate": 1e-05, + "loss": 0.2486, + "step": 13150 + }, + { + "epoch": 0.04509536504629471, + "grad_norm": 1.1524683237075806, + "learning_rate": 1e-05, + "loss": 0.2533, + "step": 13160 + }, + { + "epoch": 0.04512963204101074, + "grad_norm": 1.1614208221435547, + "learning_rate": 1e-05, + "loss": 0.2495, + "step": 13170 + }, + { + "epoch": 0.04516389903572677, + "grad_norm": 1.1307048797607422, + "learning_rate": 1e-05, + "loss": 0.2549, + "step": 13180 + }, + { + "epoch": 0.0451981660304428, + "grad_norm": 1.0327478647232056, + "learning_rate": 1e-05, + "loss": 0.2451, + "step": 13190 + }, + { + "epoch": 0.04523243302515883, + "grad_norm": 1.2401607036590576, + "learning_rate": 1e-05, + "loss": 0.2492, + "step": 13200 + }, + { + "epoch": 0.04526670001987486, + "grad_norm": 1.095413088798523, + "learning_rate": 1e-05, + "loss": 0.2487, + "step": 13210 + }, + { + "epoch": 0.04530096701459089, + "grad_norm": 1.2537821531295776, + "learning_rate": 1e-05, + "loss": 0.246, + "step": 13220 + }, + { + "epoch": 0.04533523400930692, + "grad_norm": 1.1861079931259155, + "learning_rate": 1e-05, + "loss": 0.2487, + "step": 13230 + }, + { + "epoch": 0.04536950100402295, + "grad_norm": 1.1059224605560303, + "learning_rate": 1e-05, + "loss": 0.2465, + "step": 13240 + }, + { + "epoch": 0.04540376799873898, + "grad_norm": 1.159122109413147, + "learning_rate": 1e-05, + "loss": 0.2472, + "step": 13250 + }, + { + "epoch": 0.045438034993455, + "grad_norm": 1.0307060480117798, + "learning_rate": 1e-05, + "loss": 0.2583, + "step": 13260 + }, + { + "epoch": 0.04547230198817103, + "grad_norm": 1.0377501249313354, + "learning_rate": 1e-05, + "loss": 0.2547, + "step": 13270 + }, + { + "epoch": 0.04550656898288706, + "grad_norm": 1.124543309211731, + "learning_rate": 1e-05, + "loss": 0.2396, + "step": 13280 + }, + { + "epoch": 0.04554083597760309, + "grad_norm": 1.0829116106033325, + "learning_rate": 1e-05, + "loss": 0.2417, + "step": 13290 + }, + { + "epoch": 0.04557510297231912, + "grad_norm": 1.057477355003357, + "learning_rate": 1e-05, + "loss": 0.2527, + "step": 13300 + }, + { + "epoch": 0.04560936996703515, + "grad_norm": 1.062674641609192, + "learning_rate": 1e-05, + "loss": 0.2423, + "step": 13310 + }, + { + "epoch": 0.04564363696175118, + "grad_norm": 1.131895661354065, + "learning_rate": 1e-05, + "loss": 0.2517, + "step": 13320 + }, + { + "epoch": 0.04567790395646721, + "grad_norm": 1.016940951347351, + "learning_rate": 1e-05, + "loss": 0.2419, + "step": 13330 + }, + { + "epoch": 0.04571217095118324, + "grad_norm": 1.273378849029541, + "learning_rate": 1e-05, + "loss": 0.2465, + "step": 13340 + }, + { + "epoch": 0.04574643794589927, + "grad_norm": 1.0757806301116943, + "learning_rate": 1e-05, + "loss": 0.2447, + "step": 13350 + }, + { + "epoch": 0.0457807049406153, + "grad_norm": 1.3264166116714478, + "learning_rate": 1e-05, + "loss": 0.2545, + "step": 13360 + }, + { + "epoch": 0.04581497193533133, + "grad_norm": 1.1011106967926025, + "learning_rate": 1e-05, + "loss": 0.269, + "step": 13370 + }, + { + "epoch": 0.04584923893004736, + "grad_norm": 1.0483593940734863, + "learning_rate": 1e-05, + "loss": 0.2443, + "step": 13380 + }, + { + "epoch": 0.04588350592476339, + "grad_norm": 1.2940049171447754, + "learning_rate": 1e-05, + "loss": 0.2459, + "step": 13390 + }, + { + "epoch": 0.04591777291947942, + "grad_norm": 1.1630951166152954, + "learning_rate": 1e-05, + "loss": 0.2702, + "step": 13400 + }, + { + "epoch": 0.04595203991419545, + "grad_norm": 1.0715082883834839, + "learning_rate": 1e-05, + "loss": 0.2402, + "step": 13410 + }, + { + "epoch": 0.04598630690891147, + "grad_norm": 1.0946441888809204, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 13420 + }, + { + "epoch": 0.0460205739036275, + "grad_norm": 1.0796674489974976, + "learning_rate": 1e-05, + "loss": 0.2449, + "step": 13430 + }, + { + "epoch": 0.04605484089834353, + "grad_norm": 1.0534013509750366, + "learning_rate": 1e-05, + "loss": 0.2385, + "step": 13440 + }, + { + "epoch": 0.04608910789305956, + "grad_norm": 1.0427377223968506, + "learning_rate": 1e-05, + "loss": 0.2557, + "step": 13450 + }, + { + "epoch": 0.04612337488777559, + "grad_norm": 1.1708178520202637, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 13460 + }, + { + "epoch": 0.04615764188249162, + "grad_norm": 1.0531684160232544, + "learning_rate": 1e-05, + "loss": 0.2594, + "step": 13470 + }, + { + "epoch": 0.04619190887720765, + "grad_norm": 1.1972299814224243, + "learning_rate": 1e-05, + "loss": 0.2566, + "step": 13480 + }, + { + "epoch": 0.04622617587192368, + "grad_norm": 1.0194915533065796, + "learning_rate": 1e-05, + "loss": 0.2504, + "step": 13490 + }, + { + "epoch": 0.04626044286663971, + "grad_norm": 1.2437708377838135, + "learning_rate": 1e-05, + "loss": 0.251, + "step": 13500 + }, + { + "epoch": 0.04626044286663971, + "eval_cer": 12.744752160874933, + "eval_loss": 0.24938170611858368, + "eval_normalized_cer": 8.952837729816148, + "eval_runtime": 229.544, + "eval_samples_per_second": 2.231, + "eval_steps_per_second": 0.035, + "step": 13500 + }, + { + "epoch": 0.04629470986135574, + "grad_norm": 1.2545411586761475, + "learning_rate": 1e-05, + "loss": 0.2714, + "step": 13510 + }, + { + "epoch": 0.04632897685607177, + "grad_norm": 1.0949839353561401, + "learning_rate": 1e-05, + "loss": 0.2442, + "step": 13520 + }, + { + "epoch": 0.0463632438507878, + "grad_norm": 1.1343241930007935, + "learning_rate": 1e-05, + "loss": 0.2454, + "step": 13530 + }, + { + "epoch": 0.04639751084550383, + "grad_norm": 1.2311172485351562, + "learning_rate": 1e-05, + "loss": 0.2608, + "step": 13540 + }, + { + "epoch": 0.04643177784021986, + "grad_norm": 1.0095285177230835, + "learning_rate": 1e-05, + "loss": 0.2558, + "step": 13550 + }, + { + "epoch": 0.04646604483493589, + "grad_norm": 1.0752111673355103, + "learning_rate": 1e-05, + "loss": 0.237, + "step": 13560 + }, + { + "epoch": 0.04650031182965192, + "grad_norm": 1.0850863456726074, + "learning_rate": 1e-05, + "loss": 0.2552, + "step": 13570 + }, + { + "epoch": 0.04653457882436794, + "grad_norm": 1.185105323791504, + "learning_rate": 1e-05, + "loss": 0.2567, + "step": 13580 + }, + { + "epoch": 0.04656884581908397, + "grad_norm": 1.221077799797058, + "learning_rate": 1e-05, + "loss": 0.2591, + "step": 13590 + }, + { + "epoch": 0.0466031128138, + "grad_norm": 1.23322594165802, + "learning_rate": 1e-05, + "loss": 0.2485, + "step": 13600 + }, + { + "epoch": 0.04663737980851603, + "grad_norm": 1.1576459407806396, + "learning_rate": 1e-05, + "loss": 0.2573, + "step": 13610 + }, + { + "epoch": 0.04667164680323206, + "grad_norm": 1.202359914779663, + "learning_rate": 1e-05, + "loss": 0.2555, + "step": 13620 + }, + { + "epoch": 0.04670591379794809, + "grad_norm": 1.1896847486495972, + "learning_rate": 1e-05, + "loss": 0.2586, + "step": 13630 + }, + { + "epoch": 0.04674018079266412, + "grad_norm": 0.8355448246002197, + "learning_rate": 1e-05, + "loss": 0.2374, + "step": 13640 + }, + { + "epoch": 0.04677444778738015, + "grad_norm": 1.1680315732955933, + "learning_rate": 1e-05, + "loss": 0.2365, + "step": 13650 + }, + { + "epoch": 0.04680871478209618, + "grad_norm": 1.2542601823806763, + "learning_rate": 1e-05, + "loss": 0.2494, + "step": 13660 + }, + { + "epoch": 0.04684298177681221, + "grad_norm": 1.1530771255493164, + "learning_rate": 1e-05, + "loss": 0.2268, + "step": 13670 + }, + { + "epoch": 0.04687724877152824, + "grad_norm": 1.0363566875457764, + "learning_rate": 1e-05, + "loss": 0.2623, + "step": 13680 + }, + { + "epoch": 0.04691151576624427, + "grad_norm": 1.0039604902267456, + "learning_rate": 1e-05, + "loss": 0.2524, + "step": 13690 + }, + { + "epoch": 0.0469457827609603, + "grad_norm": 1.2030223608016968, + "learning_rate": 1e-05, + "loss": 0.2346, + "step": 13700 + }, + { + "epoch": 0.04698004975567633, + "grad_norm": 1.1221191883087158, + "learning_rate": 1e-05, + "loss": 0.2483, + "step": 13710 + }, + { + "epoch": 0.04701431675039236, + "grad_norm": 0.9777095913887024, + "learning_rate": 1e-05, + "loss": 0.2484, + "step": 13720 + }, + { + "epoch": 0.04704858374510839, + "grad_norm": 1.1439709663391113, + "learning_rate": 1e-05, + "loss": 0.2447, + "step": 13730 + }, + { + "epoch": 0.04708285073982441, + "grad_norm": 1.0818963050842285, + "learning_rate": 1e-05, + "loss": 0.2378, + "step": 13740 + }, + { + "epoch": 0.04711711773454044, + "grad_norm": 1.1691282987594604, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 13750 + }, + { + "epoch": 0.04715138472925647, + "grad_norm": 1.1172744035720825, + "learning_rate": 1e-05, + "loss": 0.2297, + "step": 13760 + }, + { + "epoch": 0.0471856517239725, + "grad_norm": 1.170318841934204, + "learning_rate": 1e-05, + "loss": 0.2451, + "step": 13770 + }, + { + "epoch": 0.04721991871868853, + "grad_norm": 1.152566909790039, + "learning_rate": 1e-05, + "loss": 0.2497, + "step": 13780 + }, + { + "epoch": 0.04725418571340456, + "grad_norm": 1.023813247680664, + "learning_rate": 1e-05, + "loss": 0.2654, + "step": 13790 + }, + { + "epoch": 0.04728845270812059, + "grad_norm": 1.159853458404541, + "learning_rate": 1e-05, + "loss": 0.2537, + "step": 13800 + }, + { + "epoch": 0.04732271970283662, + "grad_norm": 1.0579999685287476, + "learning_rate": 1e-05, + "loss": 0.2524, + "step": 13810 + }, + { + "epoch": 0.04735698669755265, + "grad_norm": 1.1741176843643188, + "learning_rate": 1e-05, + "loss": 0.2574, + "step": 13820 + }, + { + "epoch": 0.04739125369226868, + "grad_norm": 1.1931475400924683, + "learning_rate": 1e-05, + "loss": 0.2504, + "step": 13830 + }, + { + "epoch": 0.04742552068698471, + "grad_norm": 0.9977951645851135, + "learning_rate": 1e-05, + "loss": 0.2515, + "step": 13840 + }, + { + "epoch": 0.04745978768170074, + "grad_norm": 1.177424669265747, + "learning_rate": 1e-05, + "loss": 0.2399, + "step": 13850 + }, + { + "epoch": 0.04749405467641677, + "grad_norm": 1.0663032531738281, + "learning_rate": 1e-05, + "loss": 0.2464, + "step": 13860 + }, + { + "epoch": 0.0475283216711328, + "grad_norm": 0.9985357522964478, + "learning_rate": 1e-05, + "loss": 0.2518, + "step": 13870 + }, + { + "epoch": 0.04756258866584883, + "grad_norm": 1.1378836631774902, + "learning_rate": 1e-05, + "loss": 0.2366, + "step": 13880 + }, + { + "epoch": 0.04759685566056486, + "grad_norm": 1.130223035812378, + "learning_rate": 1e-05, + "loss": 0.2521, + "step": 13890 + }, + { + "epoch": 0.04763112265528089, + "grad_norm": 1.1601965427398682, + "learning_rate": 1e-05, + "loss": 0.2635, + "step": 13900 + }, + { + "epoch": 0.04766538964999691, + "grad_norm": 1.0559511184692383, + "learning_rate": 1e-05, + "loss": 0.2496, + "step": 13910 + }, + { + "epoch": 0.04769965664471294, + "grad_norm": 1.0258302688598633, + "learning_rate": 1e-05, + "loss": 0.2472, + "step": 13920 + }, + { + "epoch": 0.04773392363942897, + "grad_norm": 1.1403284072875977, + "learning_rate": 1e-05, + "loss": 0.2544, + "step": 13930 + }, + { + "epoch": 0.047768190634145, + "grad_norm": 1.1353199481964111, + "learning_rate": 1e-05, + "loss": 0.2502, + "step": 13940 + }, + { + "epoch": 0.04780245762886103, + "grad_norm": 1.2859739065170288, + "learning_rate": 1e-05, + "loss": 0.2314, + "step": 13950 + }, + { + "epoch": 0.04783672462357706, + "grad_norm": 1.1392145156860352, + "learning_rate": 1e-05, + "loss": 0.2493, + "step": 13960 + }, + { + "epoch": 0.04787099161829309, + "grad_norm": 1.055909276008606, + "learning_rate": 1e-05, + "loss": 0.2406, + "step": 13970 + }, + { + "epoch": 0.04790525861300912, + "grad_norm": 1.0544830560684204, + "learning_rate": 1e-05, + "loss": 0.248, + "step": 13980 + }, + { + "epoch": 0.04793952560772515, + "grad_norm": 1.0616220235824585, + "learning_rate": 1e-05, + "loss": 0.2489, + "step": 13990 + }, + { + "epoch": 0.04797379260244118, + "grad_norm": 1.0163713693618774, + "learning_rate": 1e-05, + "loss": 0.2359, + "step": 14000 + }, + { + "epoch": 0.04797379260244118, + "eval_cer": 12.683012877050626, + "eval_loss": 0.24888557195663452, + "eval_normalized_cer": 9.082733812949641, + "eval_runtime": 228.6884, + "eval_samples_per_second": 2.239, + "eval_steps_per_second": 0.035, + "step": 14000 + }, + { + "epoch": 0.04800805959715721, + "grad_norm": 1.0705435276031494, + "learning_rate": 1e-05, + "loss": 0.2407, + "step": 14010 + }, + { + "epoch": 0.04804232659187324, + "grad_norm": 0.9681057333946228, + "learning_rate": 1e-05, + "loss": 0.2506, + "step": 14020 + }, + { + "epoch": 0.04807659358658927, + "grad_norm": 1.0451055765151978, + "learning_rate": 1e-05, + "loss": 0.2523, + "step": 14030 + }, + { + "epoch": 0.0481108605813053, + "grad_norm": 1.0783635377883911, + "learning_rate": 1e-05, + "loss": 0.242, + "step": 14040 + }, + { + "epoch": 0.04814512757602133, + "grad_norm": 1.1066149473190308, + "learning_rate": 1e-05, + "loss": 0.2514, + "step": 14050 + }, + { + "epoch": 0.04817939457073736, + "grad_norm": 1.163319706916809, + "learning_rate": 1e-05, + "loss": 0.2336, + "step": 14060 + }, + { + "epoch": 0.04821366156545338, + "grad_norm": 1.1571089029312134, + "learning_rate": 1e-05, + "loss": 0.2626, + "step": 14070 + }, + { + "epoch": 0.04824792856016941, + "grad_norm": 1.153372049331665, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 14080 + }, + { + "epoch": 0.04828219555488544, + "grad_norm": 1.1359412670135498, + "learning_rate": 1e-05, + "loss": 0.245, + "step": 14090 + }, + { + "epoch": 0.04831646254960147, + "grad_norm": 1.1467552185058594, + "learning_rate": 1e-05, + "loss": 0.2185, + "step": 14100 + }, + { + "epoch": 0.0483507295443175, + "grad_norm": 1.1838459968566895, + "learning_rate": 1e-05, + "loss": 0.2236, + "step": 14110 + }, + { + "epoch": 0.04838499653903353, + "grad_norm": 1.1658817529678345, + "learning_rate": 1e-05, + "loss": 0.2527, + "step": 14120 + }, + { + "epoch": 0.04841926353374956, + "grad_norm": 1.0678468942642212, + "learning_rate": 1e-05, + "loss": 0.2299, + "step": 14130 + }, + { + "epoch": 0.04845353052846559, + "grad_norm": 1.169755220413208, + "learning_rate": 1e-05, + "loss": 0.256, + "step": 14140 + }, + { + "epoch": 0.04848779752318162, + "grad_norm": 1.1786571741104126, + "learning_rate": 1e-05, + "loss": 0.2294, + "step": 14150 + }, + { + "epoch": 0.04852206451789765, + "grad_norm": 1.0959957838058472, + "learning_rate": 1e-05, + "loss": 0.2415, + "step": 14160 + }, + { + "epoch": 0.04855633151261368, + "grad_norm": 1.3755067586898804, + "learning_rate": 1e-05, + "loss": 0.2402, + "step": 14170 + }, + { + "epoch": 0.04859059850732971, + "grad_norm": 1.0811392068862915, + "learning_rate": 1e-05, + "loss": 0.2565, + "step": 14180 + }, + { + "epoch": 0.04862486550204574, + "grad_norm": 1.0909180641174316, + "learning_rate": 1e-05, + "loss": 0.2338, + "step": 14190 + }, + { + "epoch": 0.04865913249676177, + "grad_norm": 1.168319821357727, + "learning_rate": 1e-05, + "loss": 0.2422, + "step": 14200 + }, + { + "epoch": 0.0486933994914778, + "grad_norm": 1.0762922763824463, + "learning_rate": 1e-05, + "loss": 0.2419, + "step": 14210 + }, + { + "epoch": 0.04872766648619383, + "grad_norm": 0.9447901248931885, + "learning_rate": 1e-05, + "loss": 0.2257, + "step": 14220 + }, + { + "epoch": 0.048761933480909854, + "grad_norm": 1.0396913290023804, + "learning_rate": 1e-05, + "loss": 0.2351, + "step": 14230 + }, + { + "epoch": 0.048796200475625884, + "grad_norm": 1.0487229824066162, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 14240 + }, + { + "epoch": 0.048830467470341914, + "grad_norm": 1.1879597902297974, + "learning_rate": 1e-05, + "loss": 0.251, + "step": 14250 + }, + { + "epoch": 0.048864734465057943, + "grad_norm": 1.2501291036605835, + "learning_rate": 1e-05, + "loss": 0.2421, + "step": 14260 + }, + { + "epoch": 0.04889900145977397, + "grad_norm": 1.119624137878418, + "learning_rate": 1e-05, + "loss": 0.242, + "step": 14270 + }, + { + "epoch": 0.04893326845449, + "grad_norm": 1.0920354127883911, + "learning_rate": 1e-05, + "loss": 0.2352, + "step": 14280 + }, + { + "epoch": 0.04896753544920603, + "grad_norm": 1.0519214868545532, + "learning_rate": 1e-05, + "loss": 0.2466, + "step": 14290 + }, + { + "epoch": 0.04900180244392206, + "grad_norm": 1.1111136674880981, + "learning_rate": 1e-05, + "loss": 0.2438, + "step": 14300 + }, + { + "epoch": 0.04903606943863809, + "grad_norm": 1.2424713373184204, + "learning_rate": 1e-05, + "loss": 0.2358, + "step": 14310 + }, + { + "epoch": 0.04907033643335412, + "grad_norm": 1.0222002267837524, + "learning_rate": 1e-05, + "loss": 0.2302, + "step": 14320 + }, + { + "epoch": 0.04910460342807015, + "grad_norm": 0.9916470050811768, + "learning_rate": 1e-05, + "loss": 0.2068, + "step": 14330 + }, + { + "epoch": 0.04913887042278618, + "grad_norm": 1.31315016746521, + "learning_rate": 1e-05, + "loss": 0.2489, + "step": 14340 + }, + { + "epoch": 0.04917313741750221, + "grad_norm": 1.1765952110290527, + "learning_rate": 1e-05, + "loss": 0.2504, + "step": 14350 + }, + { + "epoch": 0.04920740441221824, + "grad_norm": 1.1690514087677002, + "learning_rate": 1e-05, + "loss": 0.2441, + "step": 14360 + }, + { + "epoch": 0.04924167140693427, + "grad_norm": 1.1756752729415894, + "learning_rate": 1e-05, + "loss": 0.2669, + "step": 14370 + }, + { + "epoch": 0.0492759384016503, + "grad_norm": 1.1072384119033813, + "learning_rate": 1e-05, + "loss": 0.2426, + "step": 14380 + }, + { + "epoch": 0.049310205396366324, + "grad_norm": 1.2008529901504517, + "learning_rate": 1e-05, + "loss": 0.2545, + "step": 14390 + }, + { + "epoch": 0.049344472391082354, + "grad_norm": 1.1737167835235596, + "learning_rate": 1e-05, + "loss": 0.25, + "step": 14400 + }, + { + "epoch": 0.049378739385798384, + "grad_norm": 1.0450342893600464, + "learning_rate": 1e-05, + "loss": 0.2539, + "step": 14410 + }, + { + "epoch": 0.049413006380514414, + "grad_norm": 1.0435712337493896, + "learning_rate": 1e-05, + "loss": 0.2404, + "step": 14420 + }, + { + "epoch": 0.049447273375230444, + "grad_norm": 1.2220741510391235, + "learning_rate": 1e-05, + "loss": 0.2544, + "step": 14430 + }, + { + "epoch": 0.049481540369946474, + "grad_norm": 1.285495400428772, + "learning_rate": 1e-05, + "loss": 0.2563, + "step": 14440 + }, + { + "epoch": 0.049515807364662504, + "grad_norm": 1.2037091255187988, + "learning_rate": 1e-05, + "loss": 0.269, + "step": 14450 + }, + { + "epoch": 0.04955007435937853, + "grad_norm": 1.1641725301742554, + "learning_rate": 1e-05, + "loss": 0.2582, + "step": 14460 + }, + { + "epoch": 0.04958434135409456, + "grad_norm": 1.30322265625, + "learning_rate": 1e-05, + "loss": 0.2637, + "step": 14470 + }, + { + "epoch": 0.04961860834881059, + "grad_norm": 1.0870246887207031, + "learning_rate": 1e-05, + "loss": 0.2319, + "step": 14480 + }, + { + "epoch": 0.04965287534352662, + "grad_norm": 1.1934154033660889, + "learning_rate": 1e-05, + "loss": 0.2654, + "step": 14490 + }, + { + "epoch": 0.04968714233824265, + "grad_norm": 1.0471813678741455, + "learning_rate": 1e-05, + "loss": 0.2672, + "step": 14500 + }, + { + "epoch": 0.04968714233824265, + "eval_cer": 12.859410830834362, + "eval_loss": 0.2491358071565628, + "eval_normalized_cer": 9.292565947242206, + "eval_runtime": 235.6794, + "eval_samples_per_second": 2.172, + "eval_steps_per_second": 0.034, + "step": 14500 + }, + { + "epoch": 0.04972140933295868, + "grad_norm": 1.1028441190719604, + "learning_rate": 1e-05, + "loss": 0.2506, + "step": 14510 + }, + { + "epoch": 0.04975567632767471, + "grad_norm": 1.0829942226409912, + "learning_rate": 1e-05, + "loss": 0.2624, + "step": 14520 + }, + { + "epoch": 0.04978994332239074, + "grad_norm": 1.094115138053894, + "learning_rate": 1e-05, + "loss": 0.2393, + "step": 14530 + }, + { + "epoch": 0.04982421031710677, + "grad_norm": 1.1440691947937012, + "learning_rate": 1e-05, + "loss": 0.2454, + "step": 14540 + }, + { + "epoch": 0.0498584773118228, + "grad_norm": 1.171846866607666, + "learning_rate": 1e-05, + "loss": 0.2528, + "step": 14550 + }, + { + "epoch": 0.049892744306538825, + "grad_norm": 1.1416808366775513, + "learning_rate": 1e-05, + "loss": 0.2545, + "step": 14560 + }, + { + "epoch": 0.049927011301254855, + "grad_norm": 1.2240614891052246, + "learning_rate": 1e-05, + "loss": 0.2511, + "step": 14570 + }, + { + "epoch": 0.049961278295970885, + "grad_norm": 1.224147081375122, + "learning_rate": 1e-05, + "loss": 0.2662, + "step": 14580 + }, + { + "epoch": 0.049995545290686914, + "grad_norm": 1.0746549367904663, + "learning_rate": 1e-05, + "loss": 0.2466, + "step": 14590 + }, + { + "epoch": 0.050029812285402944, + "grad_norm": 1.0464617013931274, + "learning_rate": 1e-05, + "loss": 0.2483, + "step": 14600 + }, + { + "epoch": 0.050064079280118974, + "grad_norm": 1.0312144756317139, + "learning_rate": 1e-05, + "loss": 0.2496, + "step": 14610 + }, + { + "epoch": 0.050098346274835004, + "grad_norm": 1.109796166419983, + "learning_rate": 1e-05, + "loss": 0.2459, + "step": 14620 + }, + { + "epoch": 0.050132613269551034, + "grad_norm": 1.100741982460022, + "learning_rate": 1e-05, + "loss": 0.2442, + "step": 14630 + }, + { + "epoch": 0.050166880264267064, + "grad_norm": 1.187683343887329, + "learning_rate": 1e-05, + "loss": 0.2434, + "step": 14640 + }, + { + "epoch": 0.05020114725898309, + "grad_norm": 1.1926337480545044, + "learning_rate": 1e-05, + "loss": 0.2554, + "step": 14650 + }, + { + "epoch": 0.05023541425369912, + "grad_norm": 0.9210227727890015, + "learning_rate": 1e-05, + "loss": 0.2497, + "step": 14660 + }, + { + "epoch": 0.05026968124841515, + "grad_norm": 1.010360836982727, + "learning_rate": 1e-05, + "loss": 0.2505, + "step": 14670 + }, + { + "epoch": 0.05030394824313118, + "grad_norm": 1.0252940654754639, + "learning_rate": 1e-05, + "loss": 0.2461, + "step": 14680 + }, + { + "epoch": 0.05033821523784721, + "grad_norm": 1.297200322151184, + "learning_rate": 1e-05, + "loss": 0.2526, + "step": 14690 + }, + { + "epoch": 0.05037248223256324, + "grad_norm": 1.2773388624191284, + "learning_rate": 1e-05, + "loss": 0.2484, + "step": 14700 + }, + { + "epoch": 0.05040674922727927, + "grad_norm": 1.110484004020691, + "learning_rate": 1e-05, + "loss": 0.2542, + "step": 14710 + }, + { + "epoch": 0.050441016221995295, + "grad_norm": 1.0664464235305786, + "learning_rate": 1e-05, + "loss": 0.2528, + "step": 14720 + }, + { + "epoch": 0.050475283216711325, + "grad_norm": 1.1421204805374146, + "learning_rate": 1e-05, + "loss": 0.2464, + "step": 14730 + }, + { + "epoch": 0.050509550211427355, + "grad_norm": 1.115225911140442, + "learning_rate": 1e-05, + "loss": 0.2584, + "step": 14740 + }, + { + "epoch": 0.050543817206143385, + "grad_norm": 1.1027865409851074, + "learning_rate": 1e-05, + "loss": 0.2606, + "step": 14750 + }, + { + "epoch": 0.050578084200859415, + "grad_norm": 1.1306570768356323, + "learning_rate": 1e-05, + "loss": 0.2561, + "step": 14760 + }, + { + "epoch": 0.050612351195575445, + "grad_norm": 1.0982325077056885, + "learning_rate": 1e-05, + "loss": 0.2688, + "step": 14770 + }, + { + "epoch": 0.050646618190291474, + "grad_norm": 1.004801630973816, + "learning_rate": 1e-05, + "loss": 0.2596, + "step": 14780 + }, + { + "epoch": 0.050680885185007504, + "grad_norm": 1.2542450428009033, + "learning_rate": 1e-05, + "loss": 0.2521, + "step": 14790 + }, + { + "epoch": 0.050715152179723534, + "grad_norm": 1.311880111694336, + "learning_rate": 1e-05, + "loss": 0.2426, + "step": 14800 + }, + { + "epoch": 0.050749419174439564, + "grad_norm": 1.119271159172058, + "learning_rate": 1e-05, + "loss": 0.2345, + "step": 14810 + }, + { + "epoch": 0.050783686169155594, + "grad_norm": 1.1003872156143188, + "learning_rate": 1e-05, + "loss": 0.2518, + "step": 14820 + }, + { + "epoch": 0.050817953163871624, + "grad_norm": 1.17613685131073, + "learning_rate": 1e-05, + "loss": 0.2546, + "step": 14830 + }, + { + "epoch": 0.050852220158587653, + "grad_norm": 1.188706398010254, + "learning_rate": 1e-05, + "loss": 0.247, + "step": 14840 + }, + { + "epoch": 0.05088648715330368, + "grad_norm": 1.1993244886398315, + "learning_rate": 1e-05, + "loss": 0.2619, + "step": 14850 + }, + { + "epoch": 0.05092075414801971, + "grad_norm": 1.0679277181625366, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 14860 + }, + { + "epoch": 0.05095502114273574, + "grad_norm": 1.1368016004562378, + "learning_rate": 1e-05, + "loss": 0.2453, + "step": 14870 + }, + { + "epoch": 0.050989288137451766, + "grad_norm": 1.0620750188827515, + "learning_rate": 1e-05, + "loss": 0.2365, + "step": 14880 + }, + { + "epoch": 0.051023555132167796, + "grad_norm": 1.1344637870788574, + "learning_rate": 1e-05, + "loss": 0.2479, + "step": 14890 + }, + { + "epoch": 0.051057822126883826, + "grad_norm": 1.1324440240859985, + "learning_rate": 1e-05, + "loss": 0.2501, + "step": 14900 + }, + { + "epoch": 0.051092089121599855, + "grad_norm": 1.139269232749939, + "learning_rate": 1e-05, + "loss": 0.2467, + "step": 14910 + }, + { + "epoch": 0.051126356116315885, + "grad_norm": 1.1169798374176025, + "learning_rate": 1e-05, + "loss": 0.2616, + "step": 14920 + }, + { + "epoch": 0.051160623111031915, + "grad_norm": 1.057564377784729, + "learning_rate": 1e-05, + "loss": 0.2554, + "step": 14930 + }, + { + "epoch": 0.051194890105747945, + "grad_norm": 1.084874153137207, + "learning_rate": 1e-05, + "loss": 0.2399, + "step": 14940 + }, + { + "epoch": 0.051229157100463975, + "grad_norm": 1.1470558643341064, + "learning_rate": 1e-05, + "loss": 0.252, + "step": 14950 + }, + { + "epoch": 0.051263424095180005, + "grad_norm": 1.0080534219741821, + "learning_rate": 1e-05, + "loss": 0.256, + "step": 14960 + }, + { + "epoch": 0.051297691089896034, + "grad_norm": 1.071164608001709, + "learning_rate": 1e-05, + "loss": 0.2371, + "step": 14970 + }, + { + "epoch": 0.051331958084612064, + "grad_norm": 1.1828765869140625, + "learning_rate": 1e-05, + "loss": 0.2346, + "step": 14980 + }, + { + "epoch": 0.051366225079328094, + "grad_norm": 1.2067548036575317, + "learning_rate": 1e-05, + "loss": 0.2414, + "step": 14990 + }, + { + "epoch": 0.051400492074044124, + "grad_norm": 1.0730314254760742, + "learning_rate": 1e-05, + "loss": 0.2563, + "step": 15000 + }, + { + "epoch": 0.051400492074044124, + "eval_cer": 12.568354207091199, + "eval_loss": 0.2477913200855255, + "eval_normalized_cer": 8.832933653077538, + "eval_runtime": 229.5255, + "eval_samples_per_second": 2.231, + "eval_steps_per_second": 0.035, + "step": 15000 + }, + { + "epoch": 0.051434759068760154, + "grad_norm": 1.0130051374435425, + "learning_rate": 1e-05, + "loss": 0.2571, + "step": 15010 + }, + { + "epoch": 0.051469026063476184, + "grad_norm": 1.1843127012252808, + "learning_rate": 1e-05, + "loss": 0.2501, + "step": 15020 + }, + { + "epoch": 0.051503293058192214, + "grad_norm": 1.1182798147201538, + "learning_rate": 1e-05, + "loss": 0.2559, + "step": 15030 + }, + { + "epoch": 0.051537560052908236, + "grad_norm": 1.1553055047988892, + "learning_rate": 1e-05, + "loss": 0.2396, + "step": 15040 + }, + { + "epoch": 0.051571827047624266, + "grad_norm": 1.030463695526123, + "learning_rate": 1e-05, + "loss": 0.2368, + "step": 15050 + }, + { + "epoch": 0.051606094042340296, + "grad_norm": 1.2701278924942017, + "learning_rate": 1e-05, + "loss": 0.2495, + "step": 15060 + }, + { + "epoch": 0.051640361037056326, + "grad_norm": 1.1329874992370605, + "learning_rate": 1e-05, + "loss": 0.2389, + "step": 15070 + }, + { + "epoch": 0.051674628031772356, + "grad_norm": 1.132430911064148, + "learning_rate": 1e-05, + "loss": 0.2479, + "step": 15080 + }, + { + "epoch": 0.051708895026488386, + "grad_norm": 1.1582975387573242, + "learning_rate": 1e-05, + "loss": 0.2511, + "step": 15090 + }, + { + "epoch": 0.051743162021204415, + "grad_norm": 1.0918657779693604, + "learning_rate": 1e-05, + "loss": 0.2335, + "step": 15100 + }, + { + "epoch": 0.051777429015920445, + "grad_norm": 1.1993087530136108, + "learning_rate": 1e-05, + "loss": 0.2468, + "step": 15110 + }, + { + "epoch": 0.051811696010636475, + "grad_norm": 1.1809076070785522, + "learning_rate": 1e-05, + "loss": 0.2652, + "step": 15120 + }, + { + "epoch": 0.051845963005352505, + "grad_norm": 1.2104005813598633, + "learning_rate": 1e-05, + "loss": 0.2444, + "step": 15130 + }, + { + "epoch": 0.051880230000068535, + "grad_norm": 1.0615415573120117, + "learning_rate": 1e-05, + "loss": 0.2558, + "step": 15140 + }, + { + "epoch": 0.051914496994784565, + "grad_norm": 1.1397675275802612, + "learning_rate": 1e-05, + "loss": 0.2543, + "step": 15150 + }, + { + "epoch": 0.051948763989500595, + "grad_norm": 1.1353163719177246, + "learning_rate": 1e-05, + "loss": 0.2445, + "step": 15160 + }, + { + "epoch": 0.051983030984216624, + "grad_norm": 1.1440542936325073, + "learning_rate": 1e-05, + "loss": 0.2396, + "step": 15170 + }, + { + "epoch": 0.052017297978932654, + "grad_norm": 1.2371265888214111, + "learning_rate": 1e-05, + "loss": 0.2318, + "step": 15180 + }, + { + "epoch": 0.052051564973648684, + "grad_norm": 1.03389310836792, + "learning_rate": 1e-05, + "loss": 0.2312, + "step": 15190 + }, + { + "epoch": 0.052085831968364714, + "grad_norm": 1.3557147979736328, + "learning_rate": 1e-05, + "loss": 0.2387, + "step": 15200 + }, + { + "epoch": 0.05212009896308074, + "grad_norm": 1.1793631315231323, + "learning_rate": 1e-05, + "loss": 0.2528, + "step": 15210 + }, + { + "epoch": 0.05215436595779677, + "grad_norm": 1.1020557880401611, + "learning_rate": 1e-05, + "loss": 0.2438, + "step": 15220 + }, + { + "epoch": 0.052188632952512796, + "grad_norm": 1.0566291809082031, + "learning_rate": 1e-05, + "loss": 0.2493, + "step": 15230 + }, + { + "epoch": 0.052222899947228826, + "grad_norm": 1.0652698278427124, + "learning_rate": 1e-05, + "loss": 0.2385, + "step": 15240 + }, + { + "epoch": 0.052257166941944856, + "grad_norm": 1.337236762046814, + "learning_rate": 1e-05, + "loss": 0.2532, + "step": 15250 + }, + { + "epoch": 0.052291433936660886, + "grad_norm": 1.1121892929077148, + "learning_rate": 1e-05, + "loss": 0.2428, + "step": 15260 + }, + { + "epoch": 0.052325700931376916, + "grad_norm": 1.1201363801956177, + "learning_rate": 1e-05, + "loss": 0.2477, + "step": 15270 + }, + { + "epoch": 0.052359967926092946, + "grad_norm": 1.1055474281311035, + "learning_rate": 1e-05, + "loss": 0.243, + "step": 15280 + }, + { + "epoch": 0.052394234920808976, + "grad_norm": 1.1349745988845825, + "learning_rate": 1e-05, + "loss": 0.2441, + "step": 15290 + }, + { + "epoch": 0.052428501915525005, + "grad_norm": 1.0891187191009521, + "learning_rate": 1e-05, + "loss": 0.2544, + "step": 15300 + }, + { + "epoch": 0.052462768910241035, + "grad_norm": 1.0537917613983154, + "learning_rate": 1e-05, + "loss": 0.2492, + "step": 15310 + }, + { + "epoch": 0.052497035904957065, + "grad_norm": 1.1704713106155396, + "learning_rate": 1e-05, + "loss": 0.267, + "step": 15320 + }, + { + "epoch": 0.052531302899673095, + "grad_norm": 0.9952285289764404, + "learning_rate": 1e-05, + "loss": 0.2291, + "step": 15330 + }, + { + "epoch": 0.052565569894389125, + "grad_norm": 0.9887141585350037, + "learning_rate": 1e-05, + "loss": 0.2507, + "step": 15340 + }, + { + "epoch": 0.052599836889105155, + "grad_norm": 1.20647394657135, + "learning_rate": 1e-05, + "loss": 0.2512, + "step": 15350 + }, + { + "epoch": 0.052634103883821184, + "grad_norm": 1.0504127740859985, + "learning_rate": 1e-05, + "loss": 0.2559, + "step": 15360 + }, + { + "epoch": 0.05266837087853721, + "grad_norm": 1.010195255279541, + "learning_rate": 1e-05, + "loss": 0.2292, + "step": 15370 + }, + { + "epoch": 0.05270263787325324, + "grad_norm": 1.0885406732559204, + "learning_rate": 1e-05, + "loss": 0.2244, + "step": 15380 + }, + { + "epoch": 0.05273690486796927, + "grad_norm": 0.9946883916854858, + "learning_rate": 1e-05, + "loss": 0.2488, + "step": 15390 + }, + { + "epoch": 0.0527711718626853, + "grad_norm": 1.163482666015625, + "learning_rate": 1e-05, + "loss": 0.2401, + "step": 15400 + }, + { + "epoch": 0.05280543885740133, + "grad_norm": 1.0451022386550903, + "learning_rate": 1e-05, + "loss": 0.2324, + "step": 15410 + }, + { + "epoch": 0.05283970585211736, + "grad_norm": 1.0925875902175903, + "learning_rate": 1e-05, + "loss": 0.241, + "step": 15420 + }, + { + "epoch": 0.052873972846833386, + "grad_norm": 0.9957507252693176, + "learning_rate": 1e-05, + "loss": 0.2377, + "step": 15430 + }, + { + "epoch": 0.052908239841549416, + "grad_norm": 1.1372512578964233, + "learning_rate": 1e-05, + "loss": 0.2565, + "step": 15440 + }, + { + "epoch": 0.052942506836265446, + "grad_norm": 1.2347620725631714, + "learning_rate": 1e-05, + "loss": 0.2392, + "step": 15450 + }, + { + "epoch": 0.052976773830981476, + "grad_norm": 1.0665175914764404, + "learning_rate": 1e-05, + "loss": 0.2433, + "step": 15460 + }, + { + "epoch": 0.053011040825697506, + "grad_norm": 1.2257585525512695, + "learning_rate": 1e-05, + "loss": 0.2371, + "step": 15470 + }, + { + "epoch": 0.053045307820413536, + "grad_norm": 1.1391757726669312, + "learning_rate": 1e-05, + "loss": 0.2368, + "step": 15480 + }, + { + "epoch": 0.053079574815129565, + "grad_norm": 1.0963542461395264, + "learning_rate": 1e-05, + "loss": 0.2318, + "step": 15490 + }, + { + "epoch": 0.053113841809845595, + "grad_norm": 1.0512635707855225, + "learning_rate": 1e-05, + "loss": 0.2445, + "step": 15500 + }, + { + "epoch": 0.053113841809845595, + "eval_cer": 12.947609807726229, + "eval_loss": 0.2476710081100464, + "eval_normalized_cer": 9.192645883293366, + "eval_runtime": 227.5054, + "eval_samples_per_second": 2.25, + "eval_steps_per_second": 0.035, + "step": 15500 + }, + { + "epoch": 0.053148108804561625, + "grad_norm": 1.1827884912490845, + "learning_rate": 1e-05, + "loss": 0.2591, + "step": 15510 + }, + { + "epoch": 0.053182375799277655, + "grad_norm": 1.1053601503372192, + "learning_rate": 1e-05, + "loss": 0.2313, + "step": 15520 + }, + { + "epoch": 0.05321664279399368, + "grad_norm": 0.9013387560844421, + "learning_rate": 1e-05, + "loss": 0.2418, + "step": 15530 + }, + { + "epoch": 0.05325090978870971, + "grad_norm": 1.2677943706512451, + "learning_rate": 1e-05, + "loss": 0.2374, + "step": 15540 + }, + { + "epoch": 0.05328517678342574, + "grad_norm": 1.2304824590682983, + "learning_rate": 1e-05, + "loss": 0.2598, + "step": 15550 + }, + { + "epoch": 0.05331944377814177, + "grad_norm": 1.0899723768234253, + "learning_rate": 1e-05, + "loss": 0.2278, + "step": 15560 + }, + { + "epoch": 0.0533537107728578, + "grad_norm": 1.1334359645843506, + "learning_rate": 1e-05, + "loss": 0.2428, + "step": 15570 + }, + { + "epoch": 0.05338797776757383, + "grad_norm": 0.9581900835037231, + "learning_rate": 1e-05, + "loss": 0.2353, + "step": 15580 + }, + { + "epoch": 0.05342224476228986, + "grad_norm": 1.1402366161346436, + "learning_rate": 1e-05, + "loss": 0.2224, + "step": 15590 + }, + { + "epoch": 0.05345651175700589, + "grad_norm": 1.0247036218643188, + "learning_rate": 1e-05, + "loss": 0.2364, + "step": 15600 + }, + { + "epoch": 0.05349077875172192, + "grad_norm": 1.0679134130477905, + "learning_rate": 1e-05, + "loss": 0.2338, + "step": 15610 + }, + { + "epoch": 0.053525045746437946, + "grad_norm": 1.0253273248672485, + "learning_rate": 1e-05, + "loss": 0.2338, + "step": 15620 + }, + { + "epoch": 0.053559312741153976, + "grad_norm": 1.066657543182373, + "learning_rate": 1e-05, + "loss": 0.2321, + "step": 15630 + }, + { + "epoch": 0.053593579735870006, + "grad_norm": 1.1637462377548218, + "learning_rate": 1e-05, + "loss": 0.2452, + "step": 15640 + }, + { + "epoch": 0.053627846730586036, + "grad_norm": 1.0810452699661255, + "learning_rate": 1e-05, + "loss": 0.2385, + "step": 15650 + }, + { + "epoch": 0.053662113725302066, + "grad_norm": 1.1184097528457642, + "learning_rate": 1e-05, + "loss": 0.2448, + "step": 15660 + }, + { + "epoch": 0.053696380720018096, + "grad_norm": 0.9914514422416687, + "learning_rate": 1e-05, + "loss": 0.2246, + "step": 15670 + }, + { + "epoch": 0.053730647714734125, + "grad_norm": 1.1371618509292603, + "learning_rate": 1e-05, + "loss": 0.2434, + "step": 15680 + }, + { + "epoch": 0.05376491470945015, + "grad_norm": 1.0953712463378906, + "learning_rate": 1e-05, + "loss": 0.2414, + "step": 15690 + }, + { + "epoch": 0.05379918170416618, + "grad_norm": 1.05868399143219, + "learning_rate": 1e-05, + "loss": 0.2533, + "step": 15700 + }, + { + "epoch": 0.05383344869888221, + "grad_norm": 1.2226771116256714, + "learning_rate": 1e-05, + "loss": 0.2342, + "step": 15710 + }, + { + "epoch": 0.05386771569359824, + "grad_norm": 1.1739540100097656, + "learning_rate": 1e-05, + "loss": 0.2335, + "step": 15720 + }, + { + "epoch": 0.05390198268831427, + "grad_norm": 1.1677112579345703, + "learning_rate": 1e-05, + "loss": 0.2373, + "step": 15730 + }, + { + "epoch": 0.0539362496830303, + "grad_norm": 1.040004849433899, + "learning_rate": 1e-05, + "loss": 0.2385, + "step": 15740 + }, + { + "epoch": 0.05397051667774633, + "grad_norm": 1.0462521314620972, + "learning_rate": 1e-05, + "loss": 0.2213, + "step": 15750 + }, + { + "epoch": 0.05400478367246236, + "grad_norm": 1.1457821130752563, + "learning_rate": 1e-05, + "loss": 0.234, + "step": 15760 + }, + { + "epoch": 0.05403905066717839, + "grad_norm": 1.1003904342651367, + "learning_rate": 1e-05, + "loss": 0.2378, + "step": 15770 + }, + { + "epoch": 0.05407331766189442, + "grad_norm": 0.9855090975761414, + "learning_rate": 1e-05, + "loss": 0.2456, + "step": 15780 + }, + { + "epoch": 0.05410758465661045, + "grad_norm": 0.9081568717956543, + "learning_rate": 1e-05, + "loss": 0.2283, + "step": 15790 + }, + { + "epoch": 0.05414185165132648, + "grad_norm": 1.0517041683197021, + "learning_rate": 1e-05, + "loss": 0.241, + "step": 15800 + }, + { + "epoch": 0.054176118646042506, + "grad_norm": 1.098231315612793, + "learning_rate": 1e-05, + "loss": 0.2364, + "step": 15810 + }, + { + "epoch": 0.054210385640758536, + "grad_norm": 1.2353124618530273, + "learning_rate": 1e-05, + "loss": 0.223, + "step": 15820 + }, + { + "epoch": 0.054244652635474566, + "grad_norm": 1.2910332679748535, + "learning_rate": 1e-05, + "loss": 0.2391, + "step": 15830 + }, + { + "epoch": 0.054278919630190596, + "grad_norm": 0.9715086221694946, + "learning_rate": 1e-05, + "loss": 0.232, + "step": 15840 + }, + { + "epoch": 0.054313186624906626, + "grad_norm": 0.9929107427597046, + "learning_rate": 1e-05, + "loss": 0.2425, + "step": 15850 + }, + { + "epoch": 0.05434745361962265, + "grad_norm": 1.104021430015564, + "learning_rate": 1e-05, + "loss": 0.2242, + "step": 15860 + }, + { + "epoch": 0.05438172061433868, + "grad_norm": 1.1469321250915527, + "learning_rate": 1e-05, + "loss": 0.2403, + "step": 15870 + }, + { + "epoch": 0.05441598760905471, + "grad_norm": 1.158736228942871, + "learning_rate": 1e-05, + "loss": 0.2543, + "step": 15880 + }, + { + "epoch": 0.05445025460377074, + "grad_norm": 1.034866213798523, + "learning_rate": 1e-05, + "loss": 0.2524, + "step": 15890 + }, + { + "epoch": 0.05448452159848677, + "grad_norm": 1.0600367784500122, + "learning_rate": 1e-05, + "loss": 0.2434, + "step": 15900 + }, + { + "epoch": 0.0545187885932028, + "grad_norm": 1.2206172943115234, + "learning_rate": 1e-05, + "loss": 0.2419, + "step": 15910 + }, + { + "epoch": 0.05455305558791883, + "grad_norm": 1.3504046201705933, + "learning_rate": 1e-05, + "loss": 0.2663, + "step": 15920 + }, + { + "epoch": 0.05458732258263486, + "grad_norm": 0.9881328344345093, + "learning_rate": 1e-05, + "loss": 0.2507, + "step": 15930 + }, + { + "epoch": 0.05462158957735089, + "grad_norm": 1.104619026184082, + "learning_rate": 1e-05, + "loss": 0.2552, + "step": 15940 + }, + { + "epoch": 0.05465585657206692, + "grad_norm": 0.9734206795692444, + "learning_rate": 1e-05, + "loss": 0.2552, + "step": 15950 + }, + { + "epoch": 0.05469012356678295, + "grad_norm": 1.0191655158996582, + "learning_rate": 1e-05, + "loss": 0.2564, + "step": 15960 + }, + { + "epoch": 0.05472439056149898, + "grad_norm": 1.0736887454986572, + "learning_rate": 1e-05, + "loss": 0.235, + "step": 15970 + }, + { + "epoch": 0.05475865755621501, + "grad_norm": 0.9910275936126709, + "learning_rate": 1e-05, + "loss": 0.2476, + "step": 15980 + }, + { + "epoch": 0.05479292455093104, + "grad_norm": 1.2393155097961426, + "learning_rate": 1e-05, + "loss": 0.2613, + "step": 15990 + }, + { + "epoch": 0.05482719154564707, + "grad_norm": 1.1756526231765747, + "learning_rate": 1e-05, + "loss": 0.2568, + "step": 16000 + }, + { + "epoch": 0.05482719154564707, + "eval_cer": 13.300405715293703, + "eval_loss": 0.2448866218328476, + "eval_normalized_cer": 9.672262190247801, + "eval_runtime": 228.492, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.035, + "step": 16000 + }, + { + "epoch": 0.054861458540363096, + "grad_norm": 1.2805075645446777, + "learning_rate": 1e-05, + "loss": 0.2613, + "step": 16010 + }, + { + "epoch": 0.05489572553507912, + "grad_norm": 1.01006019115448, + "learning_rate": 1e-05, + "loss": 0.2767, + "step": 16020 + }, + { + "epoch": 0.05492999252979515, + "grad_norm": 1.2456789016723633, + "learning_rate": 1e-05, + "loss": 0.2519, + "step": 16030 + }, + { + "epoch": 0.05496425952451118, + "grad_norm": 1.2959610223770142, + "learning_rate": 1e-05, + "loss": 0.2583, + "step": 16040 + }, + { + "epoch": 0.05499852651922721, + "grad_norm": 1.1990777254104614, + "learning_rate": 1e-05, + "loss": 0.2465, + "step": 16050 + }, + { + "epoch": 0.05503279351394324, + "grad_norm": 1.0876649618148804, + "learning_rate": 1e-05, + "loss": 0.2602, + "step": 16060 + }, + { + "epoch": 0.05506706050865927, + "grad_norm": 0.9991684556007385, + "learning_rate": 1e-05, + "loss": 0.2363, + "step": 16070 + }, + { + "epoch": 0.0551013275033753, + "grad_norm": 1.1685731410980225, + "learning_rate": 1e-05, + "loss": 0.2489, + "step": 16080 + }, + { + "epoch": 0.05513559449809133, + "grad_norm": 1.086849331855774, + "learning_rate": 1e-05, + "loss": 0.2525, + "step": 16090 + }, + { + "epoch": 0.05516986149280736, + "grad_norm": 1.0985273122787476, + "learning_rate": 1e-05, + "loss": 0.2482, + "step": 16100 + }, + { + "epoch": 0.05520412848752339, + "grad_norm": 1.112008810043335, + "learning_rate": 1e-05, + "loss": 0.244, + "step": 16110 + }, + { + "epoch": 0.05523839548223942, + "grad_norm": 1.068474531173706, + "learning_rate": 1e-05, + "loss": 0.2349, + "step": 16120 + }, + { + "epoch": 0.05527266247695545, + "grad_norm": 1.001871943473816, + "learning_rate": 1e-05, + "loss": 0.242, + "step": 16130 + }, + { + "epoch": 0.05530692947167148, + "grad_norm": 1.1889883279800415, + "learning_rate": 1e-05, + "loss": 0.2293, + "step": 16140 + }, + { + "epoch": 0.05534119646638751, + "grad_norm": 1.1890829801559448, + "learning_rate": 1e-05, + "loss": 0.2399, + "step": 16150 + }, + { + "epoch": 0.05537546346110354, + "grad_norm": 1.0574150085449219, + "learning_rate": 1e-05, + "loss": 0.2379, + "step": 16160 + }, + { + "epoch": 0.05540973045581957, + "grad_norm": 1.1458338499069214, + "learning_rate": 1e-05, + "loss": 0.2501, + "step": 16170 + }, + { + "epoch": 0.05544399745053559, + "grad_norm": 1.0738166570663452, + "learning_rate": 1e-05, + "loss": 0.2292, + "step": 16180 + }, + { + "epoch": 0.05547826444525162, + "grad_norm": 1.1803652048110962, + "learning_rate": 1e-05, + "loss": 0.2286, + "step": 16190 + }, + { + "epoch": 0.05551253143996765, + "grad_norm": 1.02376127243042, + "learning_rate": 1e-05, + "loss": 0.2495, + "step": 16200 + }, + { + "epoch": 0.05554679843468368, + "grad_norm": 0.9988088011741638, + "learning_rate": 1e-05, + "loss": 0.2191, + "step": 16210 + }, + { + "epoch": 0.05558106542939971, + "grad_norm": 1.2596487998962402, + "learning_rate": 1e-05, + "loss": 0.2373, + "step": 16220 + }, + { + "epoch": 0.05561533242411574, + "grad_norm": 1.1729799509048462, + "learning_rate": 1e-05, + "loss": 0.2355, + "step": 16230 + }, + { + "epoch": 0.05564959941883177, + "grad_norm": 1.0790057182312012, + "learning_rate": 1e-05, + "loss": 0.2211, + "step": 16240 + }, + { + "epoch": 0.0556838664135478, + "grad_norm": 1.044366717338562, + "learning_rate": 1e-05, + "loss": 0.226, + "step": 16250 + }, + { + "epoch": 0.05571813340826383, + "grad_norm": 1.1206332445144653, + "learning_rate": 1e-05, + "loss": 0.2121, + "step": 16260 + }, + { + "epoch": 0.05575240040297986, + "grad_norm": 1.0622124671936035, + "learning_rate": 1e-05, + "loss": 0.2179, + "step": 16270 + }, + { + "epoch": 0.05578666739769589, + "grad_norm": 1.1448779106140137, + "learning_rate": 1e-05, + "loss": 0.2266, + "step": 16280 + }, + { + "epoch": 0.05582093439241192, + "grad_norm": 0.9783304929733276, + "learning_rate": 1e-05, + "loss": 0.2254, + "step": 16290 + }, + { + "epoch": 0.05585520138712795, + "grad_norm": 1.2645550966262817, + "learning_rate": 1e-05, + "loss": 0.2235, + "step": 16300 + }, + { + "epoch": 0.05588946838184398, + "grad_norm": 1.1020660400390625, + "learning_rate": 1e-05, + "loss": 0.2363, + "step": 16310 + }, + { + "epoch": 0.05592373537656001, + "grad_norm": 1.1464896202087402, + "learning_rate": 1e-05, + "loss": 0.2407, + "step": 16320 + }, + { + "epoch": 0.05595800237127604, + "grad_norm": 1.2940075397491455, + "learning_rate": 1e-05, + "loss": 0.2315, + "step": 16330 + }, + { + "epoch": 0.05599226936599206, + "grad_norm": 1.0869808197021484, + "learning_rate": 1e-05, + "loss": 0.2176, + "step": 16340 + }, + { + "epoch": 0.05602653636070809, + "grad_norm": 1.025872826576233, + "learning_rate": 1e-05, + "loss": 0.2138, + "step": 16350 + }, + { + "epoch": 0.05606080335542412, + "grad_norm": 1.0538456439971924, + "learning_rate": 1e-05, + "loss": 0.2204, + "step": 16360 + }, + { + "epoch": 0.05609507035014015, + "grad_norm": 1.0765165090560913, + "learning_rate": 1e-05, + "loss": 0.2327, + "step": 16370 + }, + { + "epoch": 0.05612933734485618, + "grad_norm": 1.1909908056259155, + "learning_rate": 1e-05, + "loss": 0.239, + "step": 16380 + }, + { + "epoch": 0.05616360433957221, + "grad_norm": 1.249849557876587, + "learning_rate": 1e-05, + "loss": 0.2318, + "step": 16390 + }, + { + "epoch": 0.05619787133428824, + "grad_norm": 1.0325443744659424, + "learning_rate": 1e-05, + "loss": 0.2372, + "step": 16400 + }, + { + "epoch": 0.05623213832900427, + "grad_norm": 2.3129172325134277, + "learning_rate": 1e-05, + "loss": 0.2334, + "step": 16410 + }, + { + "epoch": 0.0562664053237203, + "grad_norm": 1.1196421384811401, + "learning_rate": 1e-05, + "loss": 0.2389, + "step": 16420 + }, + { + "epoch": 0.05630067231843633, + "grad_norm": 1.170823335647583, + "learning_rate": 1e-05, + "loss": 0.2435, + "step": 16430 + }, + { + "epoch": 0.05633493931315236, + "grad_norm": 1.2110240459442139, + "learning_rate": 1e-05, + "loss": 0.2499, + "step": 16440 + }, + { + "epoch": 0.05636920630786839, + "grad_norm": 1.2487872838974, + "learning_rate": 1e-05, + "loss": 0.2556, + "step": 16450 + }, + { + "epoch": 0.05640347330258442, + "grad_norm": 1.0886249542236328, + "learning_rate": 1e-05, + "loss": 0.2297, + "step": 16460 + }, + { + "epoch": 0.05643774029730045, + "grad_norm": 1.274640679359436, + "learning_rate": 1e-05, + "loss": 0.236, + "step": 16470 + }, + { + "epoch": 0.05647200729201648, + "grad_norm": 1.1046870946884155, + "learning_rate": 1e-05, + "loss": 0.2497, + "step": 16480 + }, + { + "epoch": 0.05650627428673251, + "grad_norm": 1.0462591648101807, + "learning_rate": 1e-05, + "loss": 0.2298, + "step": 16490 + }, + { + "epoch": 0.05654054128144854, + "grad_norm": 1.1058859825134277, + "learning_rate": 1e-05, + "loss": 0.2367, + "step": 16500 + }, + { + "epoch": 0.05654054128144854, + "eval_cer": 13.247486329158582, + "eval_loss": 0.24819624423980713, + "eval_normalized_cer": 9.602318145483613, + "eval_runtime": 228.0591, + "eval_samples_per_second": 2.245, + "eval_steps_per_second": 0.035, + "step": 16500 + }, + { + "epoch": 0.05657480827616456, + "grad_norm": 1.1613426208496094, + "learning_rate": 1e-05, + "loss": 0.2318, + "step": 16510 + }, + { + "epoch": 0.05660907527088059, + "grad_norm": 1.1319721937179565, + "learning_rate": 1e-05, + "loss": 0.2461, + "step": 16520 + }, + { + "epoch": 0.05664334226559662, + "grad_norm": 1.2690738439559937, + "learning_rate": 1e-05, + "loss": 0.2392, + "step": 16530 + }, + { + "epoch": 0.05667760926031265, + "grad_norm": 1.119575023651123, + "learning_rate": 1e-05, + "loss": 0.2467, + "step": 16540 + }, + { + "epoch": 0.05671187625502868, + "grad_norm": 1.119841456413269, + "learning_rate": 1e-05, + "loss": 0.223, + "step": 16550 + }, + { + "epoch": 0.05674614324974471, + "grad_norm": 1.0248748064041138, + "learning_rate": 1e-05, + "loss": 0.2392, + "step": 16560 + }, + { + "epoch": 0.05678041024446074, + "grad_norm": 1.0252872705459595, + "learning_rate": 1e-05, + "loss": 0.2426, + "step": 16570 + }, + { + "epoch": 0.05681467723917677, + "grad_norm": 1.1694291830062866, + "learning_rate": 1e-05, + "loss": 0.2078, + "step": 16580 + }, + { + "epoch": 0.0568489442338928, + "grad_norm": 1.152016520500183, + "learning_rate": 1e-05, + "loss": 0.2423, + "step": 16590 + }, + { + "epoch": 0.05688321122860883, + "grad_norm": 0.9418520927429199, + "learning_rate": 1e-05, + "loss": 0.2214, + "step": 16600 + }, + { + "epoch": 0.05691747822332486, + "grad_norm": 1.1249022483825684, + "learning_rate": 1e-05, + "loss": 0.2222, + "step": 16610 + }, + { + "epoch": 0.05695174521804089, + "grad_norm": 1.0502556562423706, + "learning_rate": 1e-05, + "loss": 0.2366, + "step": 16620 + }, + { + "epoch": 0.05698601221275692, + "grad_norm": 1.0879663228988647, + "learning_rate": 1e-05, + "loss": 0.2253, + "step": 16630 + }, + { + "epoch": 0.05702027920747295, + "grad_norm": 1.0701017379760742, + "learning_rate": 1e-05, + "loss": 0.2369, + "step": 16640 + }, + { + "epoch": 0.05705454620218898, + "grad_norm": 1.0117312669754028, + "learning_rate": 1e-05, + "loss": 0.2419, + "step": 16650 + }, + { + "epoch": 0.05708881319690501, + "grad_norm": 1.1599793434143066, + "learning_rate": 1e-05, + "loss": 0.2444, + "step": 16660 + }, + { + "epoch": 0.05712308019162103, + "grad_norm": 1.1197978258132935, + "learning_rate": 1e-05, + "loss": 0.2388, + "step": 16670 + }, + { + "epoch": 0.05715734718633706, + "grad_norm": 1.1201450824737549, + "learning_rate": 1e-05, + "loss": 0.2401, + "step": 16680 + }, + { + "epoch": 0.05719161418105309, + "grad_norm": 1.0935217142105103, + "learning_rate": 1e-05, + "loss": 0.2191, + "step": 16690 + }, + { + "epoch": 0.05722588117576912, + "grad_norm": 1.237023949623108, + "learning_rate": 1e-05, + "loss": 0.2509, + "step": 16700 + }, + { + "epoch": 0.05726014817048515, + "grad_norm": 1.0313085317611694, + "learning_rate": 1e-05, + "loss": 0.2271, + "step": 16710 + }, + { + "epoch": 0.05729441516520118, + "grad_norm": 1.2026563882827759, + "learning_rate": 1e-05, + "loss": 0.2344, + "step": 16720 + }, + { + "epoch": 0.05732868215991721, + "grad_norm": 1.2139136791229248, + "learning_rate": 1e-05, + "loss": 0.2425, + "step": 16730 + }, + { + "epoch": 0.05736294915463324, + "grad_norm": 1.06145179271698, + "learning_rate": 1e-05, + "loss": 0.2547, + "step": 16740 + }, + { + "epoch": 0.05739721614934927, + "grad_norm": 1.1620399951934814, + "learning_rate": 1e-05, + "loss": 0.228, + "step": 16750 + }, + { + "epoch": 0.0574314831440653, + "grad_norm": 1.0586286783218384, + "learning_rate": 1e-05, + "loss": 0.2202, + "step": 16760 + }, + { + "epoch": 0.05746575013878133, + "grad_norm": 1.0937540531158447, + "learning_rate": 1e-05, + "loss": 0.2415, + "step": 16770 + }, + { + "epoch": 0.05750001713349736, + "grad_norm": 1.0289047956466675, + "learning_rate": 1e-05, + "loss": 0.2327, + "step": 16780 + }, + { + "epoch": 0.05753428412821339, + "grad_norm": 1.0515446662902832, + "learning_rate": 1e-05, + "loss": 0.2306, + "step": 16790 + }, + { + "epoch": 0.05756855112292942, + "grad_norm": 0.9734529852867126, + "learning_rate": 1e-05, + "loss": 0.2196, + "step": 16800 + }, + { + "epoch": 0.05760281811764545, + "grad_norm": 1.0374795198440552, + "learning_rate": 1e-05, + "loss": 0.2263, + "step": 16810 + }, + { + "epoch": 0.05763708511236148, + "grad_norm": 1.0560572147369385, + "learning_rate": 1e-05, + "loss": 0.2466, + "step": 16820 + }, + { + "epoch": 0.0576713521070775, + "grad_norm": 0.9350127577781677, + "learning_rate": 1e-05, + "loss": 0.229, + "step": 16830 + }, + { + "epoch": 0.05770561910179353, + "grad_norm": 1.180124282836914, + "learning_rate": 1e-05, + "loss": 0.2395, + "step": 16840 + }, + { + "epoch": 0.05773988609650956, + "grad_norm": 1.17545747756958, + "learning_rate": 1e-05, + "loss": 0.2427, + "step": 16850 + }, + { + "epoch": 0.05777415309122559, + "grad_norm": 1.1822388172149658, + "learning_rate": 1e-05, + "loss": 0.2525, + "step": 16860 + }, + { + "epoch": 0.05780842008594162, + "grad_norm": 1.0005474090576172, + "learning_rate": 1e-05, + "loss": 0.2215, + "step": 16870 + }, + { + "epoch": 0.05784268708065765, + "grad_norm": 1.2070783376693726, + "learning_rate": 1e-05, + "loss": 0.2476, + "step": 16880 + }, + { + "epoch": 0.05787695407537368, + "grad_norm": 1.3960411548614502, + "learning_rate": 1e-05, + "loss": 0.2535, + "step": 16890 + }, + { + "epoch": 0.05791122107008971, + "grad_norm": 1.1246318817138672, + "learning_rate": 1e-05, + "loss": 0.2532, + "step": 16900 + }, + { + "epoch": 0.05794548806480574, + "grad_norm": 1.042833685874939, + "learning_rate": 1e-05, + "loss": 0.2453, + "step": 16910 + }, + { + "epoch": 0.05797975505952177, + "grad_norm": 1.1697344779968262, + "learning_rate": 1e-05, + "loss": 0.2396, + "step": 16920 + }, + { + "epoch": 0.0580140220542378, + "grad_norm": 1.1674904823303223, + "learning_rate": 1e-05, + "loss": 0.2413, + "step": 16930 + }, + { + "epoch": 0.05804828904895383, + "grad_norm": 1.2486639022827148, + "learning_rate": 1e-05, + "loss": 0.2397, + "step": 16940 + }, + { + "epoch": 0.05808255604366986, + "grad_norm": 1.0194487571716309, + "learning_rate": 1e-05, + "loss": 0.2323, + "step": 16950 + }, + { + "epoch": 0.05811682303838589, + "grad_norm": 1.1312390565872192, + "learning_rate": 1e-05, + "loss": 0.238, + "step": 16960 + }, + { + "epoch": 0.05815109003310192, + "grad_norm": 1.0825895071029663, + "learning_rate": 1e-05, + "loss": 0.2523, + "step": 16970 + }, + { + "epoch": 0.05818535702781795, + "grad_norm": 1.1746137142181396, + "learning_rate": 1e-05, + "loss": 0.2389, + "step": 16980 + }, + { + "epoch": 0.05821962402253397, + "grad_norm": 1.1029731035232544, + "learning_rate": 1e-05, + "loss": 0.2347, + "step": 16990 + }, + { + "epoch": 0.05825389101725, + "grad_norm": 1.1937546730041504, + "learning_rate": 1e-05, + "loss": 0.2492, + "step": 17000 + }, + { + "epoch": 0.05825389101725, + "eval_cer": 12.621273593226318, + "eval_loss": 0.24555271863937378, + "eval_normalized_cer": 9.272581934452438, + "eval_runtime": 227.9675, + "eval_samples_per_second": 2.246, + "eval_steps_per_second": 0.035, + "step": 17000 + } + ], + "logging_steps": 10, + "max_steps": 291826, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.44690352193536e+21, + "train_batch_size": 128, + "trial_name": null, + "trial_params": null +}