|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.999774113395076, |
|
"eval_steps": 1000, |
|
"global_step": 33201, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00903546419697312, |
|
"grad_norm": 7.14754056930542, |
|
"learning_rate": 6.8599999999999995e-06, |
|
"loss": 1.8388, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01807092839394624, |
|
"grad_norm": 6.974137783050537, |
|
"learning_rate": 1.386e-05, |
|
"loss": 1.2229, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02710639259091936, |
|
"grad_norm": 6.240023612976074, |
|
"learning_rate": 2.0859999999999997e-05, |
|
"loss": 0.9997, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03614185678789248, |
|
"grad_norm": 5.526381492614746, |
|
"learning_rate": 2.7859999999999998e-05, |
|
"loss": 0.8603, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0451773209848656, |
|
"grad_norm": 6.0143938064575195, |
|
"learning_rate": 3.4859999999999995e-05, |
|
"loss": 0.7584, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05421278518183872, |
|
"grad_norm": 5.181371212005615, |
|
"learning_rate": 4.1859999999999996e-05, |
|
"loss": 0.6841, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.06324824937881184, |
|
"grad_norm": 4.984240531921387, |
|
"learning_rate": 4.885999999999999e-05, |
|
"loss": 0.644, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.07228371357578496, |
|
"grad_norm": 5.0428266525268555, |
|
"learning_rate": 5.586e-05, |
|
"loss": 0.6025, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.08131917777275807, |
|
"grad_norm": 4.736971378326416, |
|
"learning_rate": 6.285999999999999e-05, |
|
"loss": 0.5776, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0903546419697312, |
|
"grad_norm": 4.559544086456299, |
|
"learning_rate": 6.986e-05, |
|
"loss": 0.551, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0903546419697312, |
|
"eval_loss": 0.2710006833076477, |
|
"eval_runtime": 89.4841, |
|
"eval_samples_per_second": 47.64, |
|
"eval_steps_per_second": 0.749, |
|
"eval_wer": 0.26942939113802994, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09939010616670431, |
|
"grad_norm": 4.705749988555908, |
|
"learning_rate": 6.978696313779074e-05, |
|
"loss": 0.5356, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.10842557036367743, |
|
"grad_norm": 4.287839412689209, |
|
"learning_rate": 6.9569578584516e-05, |
|
"loss": 0.5058, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.11746103456065056, |
|
"grad_norm": 3.9484827518463135, |
|
"learning_rate": 6.935219403124125e-05, |
|
"loss": 0.4863, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.12649649875762367, |
|
"grad_norm": 4.207424640655518, |
|
"learning_rate": 6.913480947796651e-05, |
|
"loss": 0.468, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1355319629545968, |
|
"grad_norm": 4.078378200531006, |
|
"learning_rate": 6.891742492469178e-05, |
|
"loss": 0.4522, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.14456742715156992, |
|
"grad_norm": 3.6946797370910645, |
|
"learning_rate": 6.870004037141703e-05, |
|
"loss": 0.4396, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.15360289134854302, |
|
"grad_norm": 3.742530345916748, |
|
"learning_rate": 6.848265581814229e-05, |
|
"loss": 0.4338, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.16263835554551614, |
|
"grad_norm": 4.0423078536987305, |
|
"learning_rate": 6.826527126486755e-05, |
|
"loss": 0.4232, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.17167381974248927, |
|
"grad_norm": 3.7348833084106445, |
|
"learning_rate": 6.80478867115928e-05, |
|
"loss": 0.4144, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.1807092839394624, |
|
"grad_norm": 3.4496703147888184, |
|
"learning_rate": 6.783050215831805e-05, |
|
"loss": 0.4016, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1807092839394624, |
|
"eval_loss": 0.20093074440956116, |
|
"eval_runtime": 89.3016, |
|
"eval_samples_per_second": 47.737, |
|
"eval_steps_per_second": 0.75, |
|
"eval_wer": 0.20614174901710763, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.18974474813643552, |
|
"grad_norm": 3.3866732120513916, |
|
"learning_rate": 6.761311760504332e-05, |
|
"loss": 0.3858, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.19878021233340862, |
|
"grad_norm": 4.071012496948242, |
|
"learning_rate": 6.739573305176857e-05, |
|
"loss": 0.3875, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.20781567653038174, |
|
"grad_norm": 3.373796224594116, |
|
"learning_rate": 6.717834849849383e-05, |
|
"loss": 0.3795, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.21685114072735487, |
|
"grad_norm": 3.105025291442871, |
|
"learning_rate": 6.696096394521908e-05, |
|
"loss": 0.3787, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.225886604924328, |
|
"grad_norm": 3.8723206520080566, |
|
"learning_rate": 6.674357939194434e-05, |
|
"loss": 0.3716, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.23492206912130112, |
|
"grad_norm": 3.2043449878692627, |
|
"learning_rate": 6.65261948386696e-05, |
|
"loss": 0.3662, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.24395753331827422, |
|
"grad_norm": 3.2647688388824463, |
|
"learning_rate": 6.631098413092761e-05, |
|
"loss": 0.3567, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.25299299751524734, |
|
"grad_norm": 3.255851984024048, |
|
"learning_rate": 6.609359957765287e-05, |
|
"loss": 0.3541, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.26202846171222044, |
|
"grad_norm": 3.103607177734375, |
|
"learning_rate": 6.587621502437812e-05, |
|
"loss": 0.3551, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.2710639259091936, |
|
"grad_norm": 3.7592177391052246, |
|
"learning_rate": 6.565883047110337e-05, |
|
"loss": 0.3449, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2710639259091936, |
|
"eval_loss": 0.17070473730564117, |
|
"eval_runtime": 88.3474, |
|
"eval_samples_per_second": 48.253, |
|
"eval_steps_per_second": 0.758, |
|
"eval_wer": 0.17702688343427903, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2800993901061667, |
|
"grad_norm": 2.7764692306518555, |
|
"learning_rate": 6.544144591782863e-05, |
|
"loss": 0.3477, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.28913485430313984, |
|
"grad_norm": 2.980421543121338, |
|
"learning_rate": 6.522406136455388e-05, |
|
"loss": 0.3367, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.29817031850011294, |
|
"grad_norm": 3.0955636501312256, |
|
"learning_rate": 6.500667681127915e-05, |
|
"loss": 0.3347, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.30720578269708604, |
|
"grad_norm": 2.942781925201416, |
|
"learning_rate": 6.47892922580044e-05, |
|
"loss": 0.3363, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.3162412468940592, |
|
"grad_norm": 2.7990803718566895, |
|
"learning_rate": 6.457190770472966e-05, |
|
"loss": 0.3324, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.3252767110910323, |
|
"grad_norm": 3.0384480953216553, |
|
"learning_rate": 6.435452315145492e-05, |
|
"loss": 0.3273, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.33431217528800544, |
|
"grad_norm": 2.8415443897247314, |
|
"learning_rate": 6.413713859818017e-05, |
|
"loss": 0.3231, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.34334763948497854, |
|
"grad_norm": 2.706265687942505, |
|
"learning_rate": 6.391975404490544e-05, |
|
"loss": 0.3224, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.35238310368195164, |
|
"grad_norm": 2.77278995513916, |
|
"learning_rate": 6.370236949163069e-05, |
|
"loss": 0.32, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.3614185678789248, |
|
"grad_norm": 2.9242990016937256, |
|
"learning_rate": 6.348498493835595e-05, |
|
"loss": 0.3147, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.3614185678789248, |
|
"eval_loss": 0.1588164120912552, |
|
"eval_runtime": 89.3911, |
|
"eval_samples_per_second": 47.689, |
|
"eval_steps_per_second": 0.75, |
|
"eval_wer": 0.1649984061204973, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.3704540320758979, |
|
"grad_norm": 3.196282148361206, |
|
"learning_rate": 6.32676003850812e-05, |
|
"loss": 0.3112, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.37948949627287104, |
|
"grad_norm": 3.880776882171631, |
|
"learning_rate": 6.305021583180646e-05, |
|
"loss": 0.3154, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.38852496046984414, |
|
"grad_norm": 2.7569668292999268, |
|
"learning_rate": 6.283283127853171e-05, |
|
"loss": 0.3108, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.39756042466681724, |
|
"grad_norm": 2.951040267944336, |
|
"learning_rate": 6.261544672525697e-05, |
|
"loss": 0.3093, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.4065958888637904, |
|
"grad_norm": 2.667750358581543, |
|
"learning_rate": 6.239806217198222e-05, |
|
"loss": 0.3082, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.4156313530607635, |
|
"grad_norm": 2.872540235519409, |
|
"learning_rate": 6.218067761870749e-05, |
|
"loss": 0.3005, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.42466681725773664, |
|
"grad_norm": 3.15378999710083, |
|
"learning_rate": 6.196329306543275e-05, |
|
"loss": 0.2994, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.43370228145470974, |
|
"grad_norm": 2.879260301589966, |
|
"learning_rate": 6.1745908512158e-05, |
|
"loss": 0.2959, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.44273774565168283, |
|
"grad_norm": 2.811612367630005, |
|
"learning_rate": 6.152852395888326e-05, |
|
"loss": 0.2974, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.451773209848656, |
|
"grad_norm": 2.7307889461517334, |
|
"learning_rate": 6.131113940560851e-05, |
|
"loss": 0.2936, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.451773209848656, |
|
"eval_loss": 0.1471971571445465, |
|
"eval_runtime": 88.7501, |
|
"eval_samples_per_second": 48.034, |
|
"eval_steps_per_second": 0.755, |
|
"eval_wer": 0.1551376049304006, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.4608086740456291, |
|
"grad_norm": 2.734050750732422, |
|
"learning_rate": 6.109375485233378e-05, |
|
"loss": 0.2917, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.46984413824260224, |
|
"grad_norm": 2.650491952896118, |
|
"learning_rate": 6.0876370299059026e-05, |
|
"loss": 0.2929, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.47887960243957534, |
|
"grad_norm": 2.519413709640503, |
|
"learning_rate": 6.065898574578429e-05, |
|
"loss": 0.2919, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.48791506663654843, |
|
"grad_norm": 2.6014676094055176, |
|
"learning_rate": 6.0441601192509545e-05, |
|
"loss": 0.2811, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.4969505308335216, |
|
"grad_norm": 2.7325778007507324, |
|
"learning_rate": 6.02242166392348e-05, |
|
"loss": 0.2878, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5059859950304947, |
|
"grad_norm": 2.636491298675537, |
|
"learning_rate": 6.000683208596006e-05, |
|
"loss": 0.2821, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.5150214592274678, |
|
"grad_norm": 2.6922860145568848, |
|
"learning_rate": 5.9789447532685315e-05, |
|
"loss": 0.2828, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.5240569234244409, |
|
"grad_norm": 2.4657480716705322, |
|
"learning_rate": 5.957206297941057e-05, |
|
"loss": 0.2845, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.5330923876214141, |
|
"grad_norm": 2.6574530601501465, |
|
"learning_rate": 5.935467842613583e-05, |
|
"loss": 0.28, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.5421278518183872, |
|
"grad_norm": 2.769786834716797, |
|
"learning_rate": 5.913729387286109e-05, |
|
"loss": 0.2758, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.5421278518183872, |
|
"eval_loss": 0.1405603438615799, |
|
"eval_runtime": 90.2531, |
|
"eval_samples_per_second": 47.234, |
|
"eval_steps_per_second": 0.742, |
|
"eval_wer": 0.14793326957815323, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.5511633160153603, |
|
"grad_norm": 2.6292548179626465, |
|
"learning_rate": 5.891990931958634e-05, |
|
"loss": 0.2744, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.5601987802123334, |
|
"grad_norm": 2.536770820617676, |
|
"learning_rate": 5.87025247663116e-05, |
|
"loss": 0.2735, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.5692342444093065, |
|
"grad_norm": 2.3336434364318848, |
|
"learning_rate": 5.848514021303685e-05, |
|
"loss": 0.2764, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.5782697086062797, |
|
"grad_norm": 2.677401542663574, |
|
"learning_rate": 5.8267755659762116e-05, |
|
"loss": 0.2761, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.5873051728032528, |
|
"grad_norm": 2.634038209915161, |
|
"learning_rate": 5.805037110648737e-05, |
|
"loss": 0.2694, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5963406370002259, |
|
"grad_norm": 2.643404245376587, |
|
"learning_rate": 5.783298655321262e-05, |
|
"loss": 0.263, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.605376101197199, |
|
"grad_norm": 2.2921056747436523, |
|
"learning_rate": 5.7615601999937885e-05, |
|
"loss": 0.2738, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.6144115653941721, |
|
"grad_norm": 2.398670196533203, |
|
"learning_rate": 5.739821744666314e-05, |
|
"loss": 0.2682, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.6234470295911453, |
|
"grad_norm": 2.447571277618408, |
|
"learning_rate": 5.71808328933884e-05, |
|
"loss": 0.2653, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.6324824937881184, |
|
"grad_norm": 2.270413637161255, |
|
"learning_rate": 5.6963448340113654e-05, |
|
"loss": 0.2663, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.6324824937881184, |
|
"eval_loss": 0.13218513131141663, |
|
"eval_runtime": 89.0739, |
|
"eval_samples_per_second": 47.859, |
|
"eval_steps_per_second": 0.752, |
|
"eval_wer": 0.13926256508341303, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.6415179579850915, |
|
"grad_norm": 2.406534433364868, |
|
"learning_rate": 5.674606378683892e-05, |
|
"loss": 0.2701, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.6505534221820646, |
|
"grad_norm": 2.3954741954803467, |
|
"learning_rate": 5.652867923356417e-05, |
|
"loss": 0.2661, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.6595888863790377, |
|
"grad_norm": 2.3920400142669678, |
|
"learning_rate": 5.631129468028943e-05, |
|
"loss": 0.2662, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.6686243505760109, |
|
"grad_norm": 2.6168298721313477, |
|
"learning_rate": 5.6096083972547435e-05, |
|
"loss": 0.259, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.677659814772984, |
|
"grad_norm": 2.351517915725708, |
|
"learning_rate": 5.587869941927269e-05, |
|
"loss": 0.2531, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.6866952789699571, |
|
"grad_norm": 2.4925589561462402, |
|
"learning_rate": 5.566131486599794e-05, |
|
"loss": 0.2584, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.6957307431669302, |
|
"grad_norm": 2.465437650680542, |
|
"learning_rate": 5.5443930312723204e-05, |
|
"loss": 0.2572, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.7047662073639033, |
|
"grad_norm": 2.383103370666504, |
|
"learning_rate": 5.522654575944846e-05, |
|
"loss": 0.2541, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.7138016715608765, |
|
"grad_norm": 2.254746675491333, |
|
"learning_rate": 5.5009161206173716e-05, |
|
"loss": 0.2551, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.7228371357578496, |
|
"grad_norm": 2.601073980331421, |
|
"learning_rate": 5.479177665289897e-05, |
|
"loss": 0.2613, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.7228371357578496, |
|
"eval_loss": 0.1282639354467392, |
|
"eval_runtime": 89.4564, |
|
"eval_samples_per_second": 47.654, |
|
"eval_steps_per_second": 0.749, |
|
"eval_wer": 0.1401763893316332, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.7318725999548227, |
|
"grad_norm": 2.6043508052825928, |
|
"learning_rate": 5.4574392099624236e-05, |
|
"loss": 0.2527, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.7409080641517958, |
|
"grad_norm": 2.4817826747894287, |
|
"learning_rate": 5.4357007546349486e-05, |
|
"loss": 0.2531, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.7499435283487689, |
|
"grad_norm": 2.2043120861053467, |
|
"learning_rate": 5.413962299307475e-05, |
|
"loss": 0.2508, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.7589789925457421, |
|
"grad_norm": 2.436621904373169, |
|
"learning_rate": 5.39222384398e-05, |
|
"loss": 0.2524, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.7680144567427152, |
|
"grad_norm": 2.2948272228240967, |
|
"learning_rate": 5.3704853886525255e-05, |
|
"loss": 0.2511, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.7770499209396883, |
|
"grad_norm": 2.516068935394287, |
|
"learning_rate": 5.348746933325052e-05, |
|
"loss": 0.2503, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.7860853851366614, |
|
"grad_norm": 2.286062002182007, |
|
"learning_rate": 5.327008477997577e-05, |
|
"loss": 0.249, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.7951208493336345, |
|
"grad_norm": 2.2099480628967285, |
|
"learning_rate": 5.305270022670103e-05, |
|
"loss": 0.2476, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.8041563135306077, |
|
"grad_norm": 2.279094934463501, |
|
"learning_rate": 5.283531567342629e-05, |
|
"loss": 0.2477, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.8131917777275808, |
|
"grad_norm": 2.5608932971954346, |
|
"learning_rate": 5.2617931120151544e-05, |
|
"loss": 0.2491, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.8131917777275808, |
|
"eval_loss": 0.12159302085638046, |
|
"eval_runtime": 88.1859, |
|
"eval_samples_per_second": 48.341, |
|
"eval_steps_per_second": 0.76, |
|
"eval_wer": 0.1319094676442461, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.8222272419245539, |
|
"grad_norm": 2.8134467601776123, |
|
"learning_rate": 5.24005465668768e-05, |
|
"loss": 0.2393, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.831262706121527, |
|
"grad_norm": 2.109177589416504, |
|
"learning_rate": 5.218316201360206e-05, |
|
"loss": 0.247, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.8402981703185001, |
|
"grad_norm": 2.333599090576172, |
|
"learning_rate": 5.196577746032731e-05, |
|
"loss": 0.2396, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.8493336345154733, |
|
"grad_norm": 2.263291120529175, |
|
"learning_rate": 5.174839290705257e-05, |
|
"loss": 0.2454, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.8583690987124464, |
|
"grad_norm": 2.1932239532470703, |
|
"learning_rate": 5.153100835377783e-05, |
|
"loss": 0.2441, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.8674045629094195, |
|
"grad_norm": 2.3545312881469727, |
|
"learning_rate": 5.131362380050308e-05, |
|
"loss": 0.2388, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.8764400271063926, |
|
"grad_norm": 1.9302074909210205, |
|
"learning_rate": 5.1096239247228345e-05, |
|
"loss": 0.2386, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.8854754913033657, |
|
"grad_norm": 2.2227907180786133, |
|
"learning_rate": 5.0878854693953595e-05, |
|
"loss": 0.245, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.8945109555003389, |
|
"grad_norm": 2.0656354427337646, |
|
"learning_rate": 5.066147014067886e-05, |
|
"loss": 0.2341, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.903546419697312, |
|
"grad_norm": 2.062394142150879, |
|
"learning_rate": 5.0444085587404114e-05, |
|
"loss": 0.238, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.903546419697312, |
|
"eval_loss": 0.11923061311244965, |
|
"eval_runtime": 88.4115, |
|
"eval_samples_per_second": 48.218, |
|
"eval_steps_per_second": 0.758, |
|
"eval_wer": 0.1290829879927744, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.9125818838942851, |
|
"grad_norm": 2.264702081680298, |
|
"learning_rate": 5.022670103412938e-05, |
|
"loss": 0.2386, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.9216173480912582, |
|
"grad_norm": 2.0281338691711426, |
|
"learning_rate": 5.000931648085463e-05, |
|
"loss": 0.2374, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.9306528122882313, |
|
"grad_norm": 2.0940310955047607, |
|
"learning_rate": 4.9791931927579883e-05, |
|
"loss": 0.2349, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.9396882764852045, |
|
"grad_norm": 2.1335864067077637, |
|
"learning_rate": 4.957454737430514e-05, |
|
"loss": 0.2326, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.9487237406821776, |
|
"grad_norm": 2.3644163608551025, |
|
"learning_rate": 4.9357162821030396e-05, |
|
"loss": 0.2314, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.9577592048791507, |
|
"grad_norm": 2.029175043106079, |
|
"learning_rate": 4.91419521132884e-05, |
|
"loss": 0.2363, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.9667946690761238, |
|
"grad_norm": 2.630101203918457, |
|
"learning_rate": 4.8924567560013664e-05, |
|
"loss": 0.2298, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.9758301332730969, |
|
"grad_norm": 2.356724500656128, |
|
"learning_rate": 4.870718300673891e-05, |
|
"loss": 0.2269, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.9848655974700701, |
|
"grad_norm": 2.1543145179748535, |
|
"learning_rate": 4.8489798453464176e-05, |
|
"loss": 0.2377, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.9939010616670432, |
|
"grad_norm": 2.399824857711792, |
|
"learning_rate": 4.827241390018943e-05, |
|
"loss": 0.2287, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.9939010616670432, |
|
"eval_loss": 0.11506820470094681, |
|
"eval_runtime": 89.5431, |
|
"eval_samples_per_second": 47.608, |
|
"eval_steps_per_second": 0.748, |
|
"eval_wer": 0.1275528636701732, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.0028913485430313, |
|
"grad_norm": 2.18354868888855, |
|
"learning_rate": 4.805502934691468e-05, |
|
"loss": 0.2129, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.0119268127400045, |
|
"grad_norm": 2.018084764480591, |
|
"learning_rate": 4.7837644793639945e-05, |
|
"loss": 0.1792, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.0209622769369777, |
|
"grad_norm": 2.1397042274475098, |
|
"learning_rate": 4.76202602403652e-05, |
|
"loss": 0.1794, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.0299977411339507, |
|
"grad_norm": 1.925986886024475, |
|
"learning_rate": 4.740287568709046e-05, |
|
"loss": 0.1816, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.039033205330924, |
|
"grad_norm": 2.0704362392425537, |
|
"learning_rate": 4.7185491133815715e-05, |
|
"loss": 0.1767, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.048068669527897, |
|
"grad_norm": 1.8338583707809448, |
|
"learning_rate": 4.696810658054098e-05, |
|
"loss": 0.1767, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.05710413372487, |
|
"grad_norm": 1.9655053615570068, |
|
"learning_rate": 4.675072202726623e-05, |
|
"loss": 0.1814, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.0661395979218433, |
|
"grad_norm": 1.880100965499878, |
|
"learning_rate": 4.653333747399149e-05, |
|
"loss": 0.1786, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.0751750621188163, |
|
"grad_norm": 2.52089524269104, |
|
"learning_rate": 4.631595292071674e-05, |
|
"loss": 0.1796, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.0842105263157895, |
|
"grad_norm": 2.179574728012085, |
|
"learning_rate": 4.6098568367441997e-05, |
|
"loss": 0.1798, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.0842105263157895, |
|
"eval_loss": 0.11312589794397354, |
|
"eval_runtime": 89.9356, |
|
"eval_samples_per_second": 47.401, |
|
"eval_steps_per_second": 0.745, |
|
"eval_wer": 0.12343002868983105, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.0932459905127625, |
|
"grad_norm": 2.3577959537506104, |
|
"learning_rate": 4.588118381416726e-05, |
|
"loss": 0.1834, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.1022814547097357, |
|
"grad_norm": 2.2626988887786865, |
|
"learning_rate": 4.566379926089251e-05, |
|
"loss": 0.1792, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.111316918906709, |
|
"grad_norm": 2.0373926162719727, |
|
"learning_rate": 4.544641470761777e-05, |
|
"loss": 0.1773, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.120352383103682, |
|
"grad_norm": 1.8774733543395996, |
|
"learning_rate": 4.522903015434303e-05, |
|
"loss": 0.1763, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.1293878473006551, |
|
"grad_norm": 2.0867061614990234, |
|
"learning_rate": 4.5011645601068285e-05, |
|
"loss": 0.1775, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.138423311497628, |
|
"grad_norm": 1.822313904762268, |
|
"learning_rate": 4.479426104779354e-05, |
|
"loss": 0.182, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.1474587756946013, |
|
"grad_norm": 1.9483801126480103, |
|
"learning_rate": 4.4579050340051546e-05, |
|
"loss": 0.1801, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.1564942398915745, |
|
"grad_norm": 1.7819561958312988, |
|
"learning_rate": 4.436166578677681e-05, |
|
"loss": 0.175, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.1655297040885475, |
|
"grad_norm": 2.2512149810791016, |
|
"learning_rate": 4.414428123350206e-05, |
|
"loss": 0.1771, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.1745651682855207, |
|
"grad_norm": 2.0755016803741455, |
|
"learning_rate": 4.3926896680227315e-05, |
|
"loss": 0.1791, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.1745651682855207, |
|
"eval_loss": 0.1113397553563118, |
|
"eval_runtime": 89.8896, |
|
"eval_samples_per_second": 47.425, |
|
"eval_steps_per_second": 0.745, |
|
"eval_wer": 0.11858463500159389, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.1836006324824937, |
|
"grad_norm": 1.8246344327926636, |
|
"learning_rate": 4.370951212695258e-05, |
|
"loss": 0.1826, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.192636096679467, |
|
"grad_norm": 2.0341689586639404, |
|
"learning_rate": 4.349212757367783e-05, |
|
"loss": 0.1795, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.2016715608764401, |
|
"grad_norm": 1.8964906930923462, |
|
"learning_rate": 4.327474302040309e-05, |
|
"loss": 0.1777, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.210707025073413, |
|
"grad_norm": 1.9983662366867065, |
|
"learning_rate": 4.305735846712835e-05, |
|
"loss": 0.1777, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.2197424892703863, |
|
"grad_norm": 1.9901524782180786, |
|
"learning_rate": 4.2839973913853604e-05, |
|
"loss": 0.1745, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.2287779534673593, |
|
"grad_norm": 2.0231523513793945, |
|
"learning_rate": 4.262258936057886e-05, |
|
"loss": 0.183, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.2378134176643325, |
|
"grad_norm": 2.097205877304077, |
|
"learning_rate": 4.240520480730412e-05, |
|
"loss": 0.1795, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.2468488818613057, |
|
"grad_norm": 1.8367393016815186, |
|
"learning_rate": 4.218782025402937e-05, |
|
"loss": 0.1746, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.2558843460582787, |
|
"grad_norm": 2.2997806072235107, |
|
"learning_rate": 4.197043570075463e-05, |
|
"loss": 0.1781, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.264919810255252, |
|
"grad_norm": 1.9972946643829346, |
|
"learning_rate": 4.1753051147479886e-05, |
|
"loss": 0.1787, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.264919810255252, |
|
"eval_loss": 0.10852447897195816, |
|
"eval_runtime": 88.2121, |
|
"eval_samples_per_second": 48.327, |
|
"eval_steps_per_second": 0.76, |
|
"eval_wer": 0.11862713845499948, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.273955274452225, |
|
"grad_norm": 1.9734628200531006, |
|
"learning_rate": 4.153566659420514e-05, |
|
"loss": 0.178, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.282990738649198, |
|
"grad_norm": 2.0544159412384033, |
|
"learning_rate": 4.1318282040930405e-05, |
|
"loss": 0.1704, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.2920262028461713, |
|
"grad_norm": 1.8968679904937744, |
|
"learning_rate": 4.1100897487655655e-05, |
|
"loss": 0.1772, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.3010616670431443, |
|
"grad_norm": 1.8103258609771729, |
|
"learning_rate": 4.088351293438092e-05, |
|
"loss": 0.179, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.3100971312401175, |
|
"grad_norm": 1.9365414381027222, |
|
"learning_rate": 4.0666128381106174e-05, |
|
"loss": 0.1775, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.3191325954370905, |
|
"grad_norm": 1.9121586084365845, |
|
"learning_rate": 4.044874382783143e-05, |
|
"loss": 0.1772, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.3281680596340637, |
|
"grad_norm": 2.0764715671539307, |
|
"learning_rate": 4.023135927455669e-05, |
|
"loss": 0.1719, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.337203523831037, |
|
"grad_norm": 1.9687429666519165, |
|
"learning_rate": 4.0013974721281944e-05, |
|
"loss": 0.1735, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.34623898802801, |
|
"grad_norm": 2.0690395832061768, |
|
"learning_rate": 3.97965901680072e-05, |
|
"loss": 0.1797, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.355274452224983, |
|
"grad_norm": 2.121548891067505, |
|
"learning_rate": 3.9579205614732456e-05, |
|
"loss": 0.1771, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.355274452224983, |
|
"eval_loss": 0.10677234828472137, |
|
"eval_runtime": 88.6946, |
|
"eval_samples_per_second": 48.064, |
|
"eval_steps_per_second": 0.755, |
|
"eval_wer": 0.11541812772287749, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.364309916421956, |
|
"grad_norm": 2.3323662281036377, |
|
"learning_rate": 3.936182106145772e-05, |
|
"loss": 0.173, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.3733453806189293, |
|
"grad_norm": 2.262308359146118, |
|
"learning_rate": 3.914443650818297e-05, |
|
"loss": 0.1723, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.3823808448159025, |
|
"grad_norm": 2.0854151248931885, |
|
"learning_rate": 3.892705195490823e-05, |
|
"loss": 0.1753, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.3914163090128755, |
|
"grad_norm": 2.0246262550354004, |
|
"learning_rate": 3.870966740163348e-05, |
|
"loss": 0.1742, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.4004517732098487, |
|
"grad_norm": 2.0298593044281006, |
|
"learning_rate": 3.8492282848358745e-05, |
|
"loss": 0.1727, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.4094872374068217, |
|
"grad_norm": 1.8497194051742554, |
|
"learning_rate": 3.8274898295084e-05, |
|
"loss": 0.1738, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.418522701603795, |
|
"grad_norm": 2.052497386932373, |
|
"learning_rate": 3.805751374180925e-05, |
|
"loss": 0.1719, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 1.427558165800768, |
|
"grad_norm": 1.948426604270935, |
|
"learning_rate": 3.7840129188534514e-05, |
|
"loss": 0.1692, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.436593629997741, |
|
"grad_norm": 2.078310012817383, |
|
"learning_rate": 3.762274463525977e-05, |
|
"loss": 0.1736, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.4456290941947143, |
|
"grad_norm": 1.8413662910461426, |
|
"learning_rate": 3.740536008198503e-05, |
|
"loss": 0.1728, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.4456290941947143, |
|
"eval_loss": 0.10456942021846771, |
|
"eval_runtime": 88.873, |
|
"eval_samples_per_second": 47.967, |
|
"eval_steps_per_second": 0.754, |
|
"eval_wer": 0.11354797577303156, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.4546645583916873, |
|
"grad_norm": 1.894006371498108, |
|
"learning_rate": 3.7187975528710283e-05, |
|
"loss": 0.1737, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 1.4637000225886605, |
|
"grad_norm": 2.0090203285217285, |
|
"learning_rate": 3.6970590975435547e-05, |
|
"loss": 0.1723, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.4727354867856337, |
|
"grad_norm": 1.896735668182373, |
|
"learning_rate": 3.6753206422160796e-05, |
|
"loss": 0.1744, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 1.4817709509826067, |
|
"grad_norm": 1.9422425031661987, |
|
"learning_rate": 3.653582186888606e-05, |
|
"loss": 0.1662, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.49080641517958, |
|
"grad_norm": 2.205997943878174, |
|
"learning_rate": 3.6318437315611316e-05, |
|
"loss": 0.1726, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.4998418793765529, |
|
"grad_norm": 2.2248659133911133, |
|
"learning_rate": 3.6101052762336565e-05, |
|
"loss": 0.1739, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.508877343573526, |
|
"grad_norm": 1.9154504537582397, |
|
"learning_rate": 3.588366820906183e-05, |
|
"loss": 0.1751, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.5179128077704993, |
|
"grad_norm": 3.7510364055633545, |
|
"learning_rate": 3.566845750131983e-05, |
|
"loss": 0.1691, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.5269482719674723, |
|
"grad_norm": 1.9326035976409912, |
|
"learning_rate": 3.545107294804509e-05, |
|
"loss": 0.1736, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.5359837361644455, |
|
"grad_norm": 2.1534535884857178, |
|
"learning_rate": 3.5233688394770345e-05, |
|
"loss": 0.1714, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.5359837361644455, |
|
"eval_loss": 0.10288450121879578, |
|
"eval_runtime": 88.7852, |
|
"eval_samples_per_second": 48.015, |
|
"eval_steps_per_second": 0.755, |
|
"eval_wer": 0.11522686218255233, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.5450192003614185, |
|
"grad_norm": 2.0503385066986084, |
|
"learning_rate": 3.50163038414956e-05, |
|
"loss": 0.1697, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.5540546645583917, |
|
"grad_norm": 2.1852426528930664, |
|
"learning_rate": 3.479891928822086e-05, |
|
"loss": 0.1687, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.563090128755365, |
|
"grad_norm": 1.9237619638442993, |
|
"learning_rate": 3.4581534734946115e-05, |
|
"loss": 0.1699, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 1.572125592952338, |
|
"grad_norm": 1.9139324426651, |
|
"learning_rate": 3.436415018167137e-05, |
|
"loss": 0.1721, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.581161057149311, |
|
"grad_norm": 1.8762294054031372, |
|
"learning_rate": 3.414676562839663e-05, |
|
"loss": 0.1682, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.590196521346284, |
|
"grad_norm": 1.6753225326538086, |
|
"learning_rate": 3.392938107512189e-05, |
|
"loss": 0.1648, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.5992319855432573, |
|
"grad_norm": 2.4316673278808594, |
|
"learning_rate": 3.371199652184715e-05, |
|
"loss": 0.1701, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 1.6082674497402305, |
|
"grad_norm": 1.9219187498092651, |
|
"learning_rate": 3.34946119685724e-05, |
|
"loss": 0.1669, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.6173029139372035, |
|
"grad_norm": 1.6715503931045532, |
|
"learning_rate": 3.327722741529766e-05, |
|
"loss": 0.1675, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.6263383781341767, |
|
"grad_norm": 1.9405934810638428, |
|
"learning_rate": 3.3059842862022916e-05, |
|
"loss": 0.1706, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.6263383781341767, |
|
"eval_loss": 0.10067987442016602, |
|
"eval_runtime": 89.3754, |
|
"eval_samples_per_second": 47.698, |
|
"eval_steps_per_second": 0.75, |
|
"eval_wer": 0.11174157900329401, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.6353738423311497, |
|
"grad_norm": 2.1481971740722656, |
|
"learning_rate": 3.284245830874817e-05, |
|
"loss": 0.1668, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.644409306528123, |
|
"grad_norm": 2.29831600189209, |
|
"learning_rate": 3.262507375547343e-05, |
|
"loss": 0.1683, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.653444770725096, |
|
"grad_norm": 1.698500633239746, |
|
"learning_rate": 3.2407689202198685e-05, |
|
"loss": 0.1651, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.662480234922069, |
|
"grad_norm": 2.0010197162628174, |
|
"learning_rate": 3.219030464892394e-05, |
|
"loss": 0.1647, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.671515699119042, |
|
"grad_norm": 1.8577830791473389, |
|
"learning_rate": 3.19729200956492e-05, |
|
"loss": 0.1649, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.6805511633160153, |
|
"grad_norm": 2.0325686931610107, |
|
"learning_rate": 3.175553554237446e-05, |
|
"loss": 0.1664, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.6895866275129885, |
|
"grad_norm": 1.8574236631393433, |
|
"learning_rate": 3.153815098909972e-05, |
|
"loss": 0.1646, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.6986220917099617, |
|
"grad_norm": 1.94573175907135, |
|
"learning_rate": 3.1320766435824974e-05, |
|
"loss": 0.1623, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.7076575559069347, |
|
"grad_norm": 1.9908078908920288, |
|
"learning_rate": 3.1103381882550224e-05, |
|
"loss": 0.1632, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.7166930201039077, |
|
"grad_norm": 1.7018805742263794, |
|
"learning_rate": 3.088599732927549e-05, |
|
"loss": 0.163, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.7166930201039077, |
|
"eval_loss": 0.09983944892883301, |
|
"eval_runtime": 88.5039, |
|
"eval_samples_per_second": 48.167, |
|
"eval_steps_per_second": 0.757, |
|
"eval_wer": 0.10740622675592391, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.7257284843008809, |
|
"grad_norm": 1.8709958791732788, |
|
"learning_rate": 3.066861277600074e-05, |
|
"loss": 0.163, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.734763948497854, |
|
"grad_norm": 2.1051034927368164, |
|
"learning_rate": 3.0451228222726e-05, |
|
"loss": 0.1632, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.7437994126948273, |
|
"grad_norm": 2.1160008907318115, |
|
"learning_rate": 3.0233843669451256e-05, |
|
"loss": 0.1677, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.7528348768918003, |
|
"grad_norm": 1.7885472774505615, |
|
"learning_rate": 3.0016459116176512e-05, |
|
"loss": 0.1628, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.7618703410887733, |
|
"grad_norm": 1.7749061584472656, |
|
"learning_rate": 2.9799074562901772e-05, |
|
"loss": 0.1623, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.7709058052857465, |
|
"grad_norm": 1.933435320854187, |
|
"learning_rate": 2.958169000962703e-05, |
|
"loss": 0.1639, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.7799412694827197, |
|
"grad_norm": 1.7979782819747925, |
|
"learning_rate": 2.9364305456352285e-05, |
|
"loss": 0.1581, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.788976733679693, |
|
"grad_norm": 1.9905706644058228, |
|
"learning_rate": 2.914692090307754e-05, |
|
"loss": 0.1623, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.7980121978766659, |
|
"grad_norm": 2.146162271499634, |
|
"learning_rate": 2.8929536349802798e-05, |
|
"loss": 0.1632, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.8070476620736389, |
|
"grad_norm": 1.861401081085205, |
|
"learning_rate": 2.8712151796528054e-05, |
|
"loss": 0.1613, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.8070476620736389, |
|
"eval_loss": 0.09824151545763016, |
|
"eval_runtime": 87.8053, |
|
"eval_samples_per_second": 48.551, |
|
"eval_steps_per_second": 0.763, |
|
"eval_wer": 0.10753373711614068, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.816083126270612, |
|
"grad_norm": 1.8411866426467896, |
|
"learning_rate": 2.849476724325331e-05, |
|
"loss": 0.165, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.8251185904675853, |
|
"grad_norm": 1.7575931549072266, |
|
"learning_rate": 2.827738268997857e-05, |
|
"loss": 0.1564, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.8341540546645585, |
|
"grad_norm": 2.028254985809326, |
|
"learning_rate": 2.8059998136703827e-05, |
|
"loss": 0.1589, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.8431895188615315, |
|
"grad_norm": 1.9810631275177002, |
|
"learning_rate": 2.7842613583429083e-05, |
|
"loss": 0.1586, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.8522249830585045, |
|
"grad_norm": 1.8610142469406128, |
|
"learning_rate": 2.7625229030154343e-05, |
|
"loss": 0.1602, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.8612604472554777, |
|
"grad_norm": 1.9897997379302979, |
|
"learning_rate": 2.74078444768796e-05, |
|
"loss": 0.1625, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.8702959114524509, |
|
"grad_norm": 1.7494564056396484, |
|
"learning_rate": 2.7190459923604856e-05, |
|
"loss": 0.1593, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.879331375649424, |
|
"grad_norm": 1.9486002922058105, |
|
"learning_rate": 2.6975249215862856e-05, |
|
"loss": 0.1595, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.888366839846397, |
|
"grad_norm": 1.950518012046814, |
|
"learning_rate": 2.6757864662588116e-05, |
|
"loss": 0.1619, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.89740230404337, |
|
"grad_norm": 1.9625803232192993, |
|
"learning_rate": 2.6540480109313373e-05, |
|
"loss": 0.1568, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.89740230404337, |
|
"eval_loss": 0.09674616158008575, |
|
"eval_runtime": 88.8971, |
|
"eval_samples_per_second": 47.954, |
|
"eval_steps_per_second": 0.754, |
|
"eval_wer": 0.10868133035809159, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.9064377682403433, |
|
"grad_norm": 1.7447710037231445, |
|
"learning_rate": 2.632309555603863e-05, |
|
"loss": 0.1566, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.9154732324373165, |
|
"grad_norm": 2.0597004890441895, |
|
"learning_rate": 2.610571100276389e-05, |
|
"loss": 0.1594, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.9245086966342897, |
|
"grad_norm": 2.045921802520752, |
|
"learning_rate": 2.5888326449489145e-05, |
|
"loss": 0.1592, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.9335441608312627, |
|
"grad_norm": 1.9995648860931396, |
|
"learning_rate": 2.56709418962144e-05, |
|
"loss": 0.1591, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.9425796250282357, |
|
"grad_norm": 1.765527367591858, |
|
"learning_rate": 2.5455731188472406e-05, |
|
"loss": 0.1578, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.9516150892252089, |
|
"grad_norm": 1.8758126497268677, |
|
"learning_rate": 2.5238346635197665e-05, |
|
"loss": 0.1577, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.960650553422182, |
|
"grad_norm": 1.770780324935913, |
|
"learning_rate": 2.502096208192292e-05, |
|
"loss": 0.1584, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.9696860176191553, |
|
"grad_norm": 1.8630551099777222, |
|
"learning_rate": 2.4803577528648175e-05, |
|
"loss": 0.1548, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.9787214818161283, |
|
"grad_norm": 1.8517158031463623, |
|
"learning_rate": 2.458619297537343e-05, |
|
"loss": 0.1593, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.9877569460131013, |
|
"grad_norm": 1.6973580121994019, |
|
"learning_rate": 2.436880842209869e-05, |
|
"loss": 0.1525, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.9877569460131013, |
|
"eval_loss": 0.0945153757929802, |
|
"eval_runtime": 87.5175, |
|
"eval_samples_per_second": 48.71, |
|
"eval_steps_per_second": 0.766, |
|
"eval_wer": 0.10449474019764106, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.9967924102100745, |
|
"grad_norm": 2.0748767852783203, |
|
"learning_rate": 2.4151423868823947e-05, |
|
"loss": 0.1573, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 2.0057826970860626, |
|
"grad_norm": 1.6151518821716309, |
|
"learning_rate": 2.3934039315549204e-05, |
|
"loss": 0.1241, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.014818161283036, |
|
"grad_norm": 1.5904980897903442, |
|
"learning_rate": 2.3716654762274464e-05, |
|
"loss": 0.1074, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 2.023853625480009, |
|
"grad_norm": 1.4857326745986938, |
|
"learning_rate": 2.349927020899972e-05, |
|
"loss": 0.1029, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.0328890896769822, |
|
"grad_norm": 1.7787961959838867, |
|
"learning_rate": 2.3281885655724976e-05, |
|
"loss": 0.1066, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.0419245538739554, |
|
"grad_norm": 1.6591817140579224, |
|
"learning_rate": 2.3066674947982977e-05, |
|
"loss": 0.1057, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.050960018070928, |
|
"grad_norm": 1.6939488649368286, |
|
"learning_rate": 2.2849290394708237e-05, |
|
"loss": 0.1051, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 2.0599954822679014, |
|
"grad_norm": 1.5981281995773315, |
|
"learning_rate": 2.2631905841433493e-05, |
|
"loss": 0.1036, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.0690309464648746, |
|
"grad_norm": 1.8668162822723389, |
|
"learning_rate": 2.241452128815875e-05, |
|
"loss": 0.1063, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 2.078066410661848, |
|
"grad_norm": 1.627382755279541, |
|
"learning_rate": 2.219713673488401e-05, |
|
"loss": 0.1063, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.078066410661848, |
|
"eval_loss": 0.0966850146651268, |
|
"eval_runtime": 88.5935, |
|
"eval_samples_per_second": 48.119, |
|
"eval_steps_per_second": 0.756, |
|
"eval_wer": 0.10462225055785783, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.087101874858821, |
|
"grad_norm": 1.6317180395126343, |
|
"learning_rate": 2.1979752181609266e-05, |
|
"loss": 0.1067, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 2.096137339055794, |
|
"grad_norm": 1.5637694597244263, |
|
"learning_rate": 2.1762367628334522e-05, |
|
"loss": 0.1061, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 2.105172803252767, |
|
"grad_norm": 1.561661720275879, |
|
"learning_rate": 2.154498307505978e-05, |
|
"loss": 0.1066, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 2.11420826744974, |
|
"grad_norm": 1.570977807044983, |
|
"learning_rate": 2.132759852178504e-05, |
|
"loss": 0.1057, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 2.1232437316467134, |
|
"grad_norm": 1.6354864835739136, |
|
"learning_rate": 2.111021396851029e-05, |
|
"loss": 0.1061, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.1322791958436866, |
|
"grad_norm": 1.6001309156417847, |
|
"learning_rate": 2.0892829415235548e-05, |
|
"loss": 0.1038, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 2.1413146600406594, |
|
"grad_norm": 1.7492948770523071, |
|
"learning_rate": 2.0675444861960808e-05, |
|
"loss": 0.1051, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 2.1503501242376326, |
|
"grad_norm": 1.7432228326797485, |
|
"learning_rate": 2.0458060308686064e-05, |
|
"loss": 0.1029, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 2.159385588434606, |
|
"grad_norm": 1.5974751710891724, |
|
"learning_rate": 2.024067575541132e-05, |
|
"loss": 0.1061, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 2.168421052631579, |
|
"grad_norm": 1.8045574426651, |
|
"learning_rate": 2.0023291202136577e-05, |
|
"loss": 0.1075, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.168421052631579, |
|
"eval_loss": 0.0951407328248024, |
|
"eval_runtime": 88.9045, |
|
"eval_samples_per_second": 47.95, |
|
"eval_steps_per_second": 0.754, |
|
"eval_wer": 0.10304962278185102, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.1774565168285522, |
|
"grad_norm": 1.6032062768936157, |
|
"learning_rate": 1.9805906648861836e-05, |
|
"loss": 0.1065, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 2.186491981025525, |
|
"grad_norm": 1.5442743301391602, |
|
"learning_rate": 1.9588522095587093e-05, |
|
"loss": 0.1063, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 2.195527445222498, |
|
"grad_norm": 1.6346817016601562, |
|
"learning_rate": 1.937113754231235e-05, |
|
"loss": 0.1036, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 2.2045629094194714, |
|
"grad_norm": 1.6535338163375854, |
|
"learning_rate": 1.9153752989037606e-05, |
|
"loss": 0.1051, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 2.2135983736164446, |
|
"grad_norm": 1.6055641174316406, |
|
"learning_rate": 1.8936368435762862e-05, |
|
"loss": 0.1064, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.222633837813418, |
|
"grad_norm": 1.936577558517456, |
|
"learning_rate": 1.871898388248812e-05, |
|
"loss": 0.1045, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 2.2316693020103906, |
|
"grad_norm": 1.58518385887146, |
|
"learning_rate": 1.8501599329213375e-05, |
|
"loss": 0.1071, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 2.240704766207364, |
|
"grad_norm": 1.73505437374115, |
|
"learning_rate": 1.8284214775938635e-05, |
|
"loss": 0.1065, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 2.249740230404337, |
|
"grad_norm": 1.7908620834350586, |
|
"learning_rate": 1.806683022266389e-05, |
|
"loss": 0.1065, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 2.2587756946013102, |
|
"grad_norm": 1.654637336730957, |
|
"learning_rate": 1.7849445669389147e-05, |
|
"loss": 0.1035, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.2587756946013102, |
|
"eval_loss": 0.09359237551689148, |
|
"eval_runtime": 90.6143, |
|
"eval_samples_per_second": 47.046, |
|
"eval_steps_per_second": 0.739, |
|
"eval_wer": 0.10149824673254702, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.2678111587982834, |
|
"grad_norm": 1.6015100479125977, |
|
"learning_rate": 1.7632061116114407e-05, |
|
"loss": 0.1062, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 2.276846622995256, |
|
"grad_norm": 1.6547913551330566, |
|
"learning_rate": 1.741467656283966e-05, |
|
"loss": 0.1053, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 2.2858820871922294, |
|
"grad_norm": 1.7010306119918823, |
|
"learning_rate": 1.719729200956492e-05, |
|
"loss": 0.1041, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 2.2949175513892026, |
|
"grad_norm": 1.8139252662658691, |
|
"learning_rate": 1.6979907456290176e-05, |
|
"loss": 0.103, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 2.303953015586176, |
|
"grad_norm": 1.6318985223770142, |
|
"learning_rate": 1.6762522903015433e-05, |
|
"loss": 0.104, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.312988479783149, |
|
"grad_norm": 1.798727035522461, |
|
"learning_rate": 1.654513834974069e-05, |
|
"loss": 0.1055, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 2.322023943980122, |
|
"grad_norm": 1.527917504310608, |
|
"learning_rate": 1.6327753796465945e-05, |
|
"loss": 0.106, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 2.331059408177095, |
|
"grad_norm": 1.6333855390548706, |
|
"learning_rate": 1.6110369243191205e-05, |
|
"loss": 0.1024, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 2.340094872374068, |
|
"grad_norm": 1.5563682317733765, |
|
"learning_rate": 1.589298468991646e-05, |
|
"loss": 0.1031, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 2.3491303365710414, |
|
"grad_norm": 1.6106479167938232, |
|
"learning_rate": 1.5675600136641718e-05, |
|
"loss": 0.1056, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.3491303365710414, |
|
"eval_loss": 0.09276529401540756, |
|
"eval_runtime": 88.7242, |
|
"eval_samples_per_second": 48.048, |
|
"eval_steps_per_second": 0.755, |
|
"eval_wer": 0.10132823291892466, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.3581658007680146, |
|
"grad_norm": 1.8455883264541626, |
|
"learning_rate": 1.5458215583366974e-05, |
|
"loss": 0.1043, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 2.3672012649649874, |
|
"grad_norm": 1.7726097106933594, |
|
"learning_rate": 1.5240831030092233e-05, |
|
"loss": 0.1015, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 2.3762367291619606, |
|
"grad_norm": 1.6910566091537476, |
|
"learning_rate": 1.5023446476817489e-05, |
|
"loss": 0.1055, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 2.385272193358934, |
|
"grad_norm": 1.642712116241455, |
|
"learning_rate": 1.4806061923542747e-05, |
|
"loss": 0.1027, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 2.394307657555907, |
|
"grad_norm": 1.6066936254501343, |
|
"learning_rate": 1.4588677370268002e-05, |
|
"loss": 0.1052, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.4033431217528802, |
|
"grad_norm": 1.7851406335830688, |
|
"learning_rate": 1.437129281699326e-05, |
|
"loss": 0.1029, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 2.412378585949853, |
|
"grad_norm": 1.9918655157089233, |
|
"learning_rate": 1.4153908263718516e-05, |
|
"loss": 0.1006, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 2.421414050146826, |
|
"grad_norm": 1.6415534019470215, |
|
"learning_rate": 1.3936523710443774e-05, |
|
"loss": 0.1038, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 2.4304495143437994, |
|
"grad_norm": 1.9253250360488892, |
|
"learning_rate": 1.3719139157169032e-05, |
|
"loss": 0.1024, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 2.4394849785407726, |
|
"grad_norm": 1.86326265335083, |
|
"learning_rate": 1.3501754603894287e-05, |
|
"loss": 0.1019, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.4394849785407726, |
|
"eval_loss": 0.09212099760770798, |
|
"eval_runtime": 88.0292, |
|
"eval_samples_per_second": 48.427, |
|
"eval_steps_per_second": 0.761, |
|
"eval_wer": 0.1000106258633514, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.448520442737746, |
|
"grad_norm": 1.7671024799346924, |
|
"learning_rate": 1.3284370050619545e-05, |
|
"loss": 0.1026, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 2.4575559069347186, |
|
"grad_norm": 1.7686715126037598, |
|
"learning_rate": 1.3066985497344802e-05, |
|
"loss": 0.1041, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 2.466591371131692, |
|
"grad_norm": 1.743655800819397, |
|
"learning_rate": 1.284960094407006e-05, |
|
"loss": 0.099, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 2.475626835328665, |
|
"grad_norm": 1.7912476062774658, |
|
"learning_rate": 1.2632216390795314e-05, |
|
"loss": 0.1034, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 2.484662299525638, |
|
"grad_norm": 1.5481427907943726, |
|
"learning_rate": 1.2414831837520572e-05, |
|
"loss": 0.1037, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.4936977637226114, |
|
"grad_norm": 1.5013809204101562, |
|
"learning_rate": 1.219744728424583e-05, |
|
"loss": 0.1028, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 2.5027332279195846, |
|
"grad_norm": 1.592502236366272, |
|
"learning_rate": 1.1980062730971087e-05, |
|
"loss": 0.1024, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 2.5117686921165574, |
|
"grad_norm": 1.6279585361480713, |
|
"learning_rate": 1.1762678177696345e-05, |
|
"loss": 0.1017, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 2.5208041563135306, |
|
"grad_norm": 1.718693733215332, |
|
"learning_rate": 1.15452936244216e-05, |
|
"loss": 0.0991, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 2.529839620510504, |
|
"grad_norm": 1.721211314201355, |
|
"learning_rate": 1.1327909071146858e-05, |
|
"loss": 0.1004, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.529839620510504, |
|
"eval_loss": 0.0911058560013771, |
|
"eval_runtime": 86.9208, |
|
"eval_samples_per_second": 49.045, |
|
"eval_steps_per_second": 0.771, |
|
"eval_wer": 0.09856550844756136, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.5388750847074766, |
|
"grad_norm": 1.708903193473816, |
|
"learning_rate": 1.1110524517872116e-05, |
|
"loss": 0.1032, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 2.54791054890445, |
|
"grad_norm": 1.6191095113754272, |
|
"learning_rate": 1.0893139964597372e-05, |
|
"loss": 0.1031, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 2.556946013101423, |
|
"grad_norm": 1.5952250957489014, |
|
"learning_rate": 1.0677929256855375e-05, |
|
"loss": 0.0991, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 2.565981477298396, |
|
"grad_norm": 1.8054704666137695, |
|
"learning_rate": 1.0460544703580633e-05, |
|
"loss": 0.0994, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 2.5750169414953694, |
|
"grad_norm": 1.4976806640625, |
|
"learning_rate": 1.024316015030589e-05, |
|
"loss": 0.0988, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.5840524056923426, |
|
"grad_norm": 1.6461458206176758, |
|
"learning_rate": 1.0025775597031147e-05, |
|
"loss": 0.0989, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 2.593087869889316, |
|
"grad_norm": 1.631536841392517, |
|
"learning_rate": 9.808391043756405e-06, |
|
"loss": 0.1001, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 2.6021233340862886, |
|
"grad_norm": 1.8152861595153809, |
|
"learning_rate": 9.59100649048166e-06, |
|
"loss": 0.1001, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 2.611158798283262, |
|
"grad_norm": 1.4996885061264038, |
|
"learning_rate": 9.373621937206918e-06, |
|
"loss": 0.103, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 2.620194262480235, |
|
"grad_norm": 1.8811280727386475, |
|
"learning_rate": 9.156237383932176e-06, |
|
"loss": 0.0992, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.620194262480235, |
|
"eval_loss": 0.09040974825620651, |
|
"eval_runtime": 87.3549, |
|
"eval_samples_per_second": 48.801, |
|
"eval_steps_per_second": 0.767, |
|
"eval_wer": 0.0979917118265859, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.629229726677208, |
|
"grad_norm": 1.550436019897461, |
|
"learning_rate": 8.938852830657433e-06, |
|
"loss": 0.0997, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 2.638265190874181, |
|
"grad_norm": 1.7116386890411377, |
|
"learning_rate": 8.721468277382689e-06, |
|
"loss": 0.1021, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 2.647300655071154, |
|
"grad_norm": 1.8250106573104858, |
|
"learning_rate": 8.504083724107947e-06, |
|
"loss": 0.0992, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 2.6563361192681274, |
|
"grad_norm": 1.704163670539856, |
|
"learning_rate": 8.286699170833203e-06, |
|
"loss": 0.0974, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 2.6653715834651006, |
|
"grad_norm": 1.7405962944030762, |
|
"learning_rate": 8.06931461755846e-06, |
|
"loss": 0.0997, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.674407047662074, |
|
"grad_norm": 1.599592685699463, |
|
"learning_rate": 7.851930064283716e-06, |
|
"loss": 0.0978, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 2.683442511859047, |
|
"grad_norm": 1.666237711906433, |
|
"learning_rate": 7.634545511008974e-06, |
|
"loss": 0.0986, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 2.69247797605602, |
|
"grad_norm": 1.6730016469955444, |
|
"learning_rate": 7.417160957734231e-06, |
|
"loss": 0.0958, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 2.701513440252993, |
|
"grad_norm": 1.800661325454712, |
|
"learning_rate": 7.199776404459488e-06, |
|
"loss": 0.0967, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 2.710548904449966, |
|
"grad_norm": 1.4267141819000244, |
|
"learning_rate": 6.982391851184745e-06, |
|
"loss": 0.1011, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.710548904449966, |
|
"eval_loss": 0.08978110551834106, |
|
"eval_runtime": 92.3004, |
|
"eval_samples_per_second": 46.186, |
|
"eval_steps_per_second": 0.726, |
|
"eval_wer": 0.09784294973966635, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.719584368646939, |
|
"grad_norm": 1.7578014135360718, |
|
"learning_rate": 6.765007297910002e-06, |
|
"loss": 0.0988, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 2.728619832843912, |
|
"grad_norm": 1.747879981994629, |
|
"learning_rate": 6.54762274463526e-06, |
|
"loss": 0.0982, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 2.7376552970408854, |
|
"grad_norm": 1.4880852699279785, |
|
"learning_rate": 6.330238191360516e-06, |
|
"loss": 0.0944, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 2.7466907612378586, |
|
"grad_norm": 1.6102066040039062, |
|
"learning_rate": 6.112853638085773e-06, |
|
"loss": 0.099, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 2.755726225434832, |
|
"grad_norm": 2.1802284717559814, |
|
"learning_rate": 5.89546908481103e-06, |
|
"loss": 0.0963, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.764761689631805, |
|
"grad_norm": 1.65652334690094, |
|
"learning_rate": 5.680258377069035e-06, |
|
"loss": 0.099, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 2.7737971538287782, |
|
"grad_norm": 1.344401240348816, |
|
"learning_rate": 5.462873823794291e-06, |
|
"loss": 0.0979, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 2.782832618025751, |
|
"grad_norm": 1.6446696519851685, |
|
"learning_rate": 5.245489270519548e-06, |
|
"loss": 0.0944, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 2.791868082222724, |
|
"grad_norm": 1.529815435409546, |
|
"learning_rate": 5.028104717244806e-06, |
|
"loss": 0.0967, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 2.8009035464196974, |
|
"grad_norm": 1.7729915380477905, |
|
"learning_rate": 4.810720163970063e-06, |
|
"loss": 0.095, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.8009035464196974, |
|
"eval_loss": 0.08919844031333923, |
|
"eval_runtime": 90.4055, |
|
"eval_samples_per_second": 47.154, |
|
"eval_steps_per_second": 0.741, |
|
"eval_wer": 0.09748167038571884, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.80993901061667, |
|
"grad_norm": 1.6226630210876465, |
|
"learning_rate": 4.59333561069532e-06, |
|
"loss": 0.0982, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 2.8189744748136434, |
|
"grad_norm": 1.5628806352615356, |
|
"learning_rate": 4.375951057420576e-06, |
|
"loss": 0.095, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 2.8280099390106166, |
|
"grad_norm": 1.5284922122955322, |
|
"learning_rate": 4.158566504145834e-06, |
|
"loss": 0.0945, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 2.83704540320759, |
|
"grad_norm": 1.9399908781051636, |
|
"learning_rate": 3.941181950871091e-06, |
|
"loss": 0.0954, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 2.846080867404563, |
|
"grad_norm": 1.7431321144104004, |
|
"learning_rate": 3.7237973975963476e-06, |
|
"loss": 0.0973, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.855116331601536, |
|
"grad_norm": 1.4165501594543457, |
|
"learning_rate": 3.5064128443216044e-06, |
|
"loss": 0.0954, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 2.8641517957985094, |
|
"grad_norm": 1.8231940269470215, |
|
"learning_rate": 3.2890282910468617e-06, |
|
"loss": 0.0969, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 2.873187259995482, |
|
"grad_norm": 1.9092686176300049, |
|
"learning_rate": 3.0716437377721185e-06, |
|
"loss": 0.0967, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 2.8822227241924554, |
|
"grad_norm": 1.6101560592651367, |
|
"learning_rate": 2.8542591844973753e-06, |
|
"loss": 0.0973, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 2.8912581883894286, |
|
"grad_norm": 1.6077231168746948, |
|
"learning_rate": 2.636874631222633e-06, |
|
"loss": 0.0975, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.8912581883894286, |
|
"eval_loss": 0.08852633088827133, |
|
"eval_runtime": 88.8694, |
|
"eval_samples_per_second": 47.969, |
|
"eval_steps_per_second": 0.754, |
|
"eval_wer": 0.096015301243226, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.9002936525864014, |
|
"grad_norm": 1.6472060680389404, |
|
"learning_rate": 2.4194900779478898e-06, |
|
"loss": 0.0953, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 2.9093291167833746, |
|
"grad_norm": 1.5193005800247192, |
|
"learning_rate": 2.2021055246731466e-06, |
|
"loss": 0.0947, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 2.918364580980348, |
|
"grad_norm": 1.3484536409378052, |
|
"learning_rate": 1.984720971398404e-06, |
|
"loss": 0.0937, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 2.927400045177321, |
|
"grad_norm": 1.6725506782531738, |
|
"learning_rate": 1.7673364181236606e-06, |
|
"loss": 0.0949, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 2.936435509374294, |
|
"grad_norm": 1.5670363903045654, |
|
"learning_rate": 1.5499518648489175e-06, |
|
"loss": 0.0928, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.9454709735712674, |
|
"grad_norm": 1.5655218362808228, |
|
"learning_rate": 1.3325673115741747e-06, |
|
"loss": 0.0923, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 2.95450643776824, |
|
"grad_norm": 1.7287861108779907, |
|
"learning_rate": 1.1151827582994317e-06, |
|
"loss": 0.0945, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 2.9635419019652134, |
|
"grad_norm": 1.5101486444473267, |
|
"learning_rate": 8.977982050246885e-07, |
|
"loss": 0.0938, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 2.9725773661621866, |
|
"grad_norm": 1.4109468460083008, |
|
"learning_rate": 6.804136517499457e-07, |
|
"loss": 0.091, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 2.98161283035916, |
|
"grad_norm": 1.537053108215332, |
|
"learning_rate": 4.6302909847520263e-07, |
|
"loss": 0.0963, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.98161283035916, |
|
"eval_loss": 0.08801376074552536, |
|
"eval_runtime": 90.4227, |
|
"eval_samples_per_second": 47.145, |
|
"eval_steps_per_second": 0.741, |
|
"eval_wer": 0.09624907023695675, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.9906482945561326, |
|
"grad_norm": 1.575260043144226, |
|
"learning_rate": 2.47818390733207e-07, |
|
"loss": 0.0934, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 2.9996837587531058, |
|
"grad_norm": 1.5032224655151367, |
|
"learning_rate": 3.043383745846402e-08, |
|
"loss": 0.0941, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 2.999774113395076, |
|
"step": 33201, |
|
"total_flos": 2.756290459511145e+20, |
|
"train_loss": 0.20740618139383307, |
|
"train_runtime": 51184.2268, |
|
"train_samples_per_second": 83.03, |
|
"train_steps_per_second": 0.649 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 33201, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.756290459511145e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|