whisper-hu-base-finetuned-V2 / trainer_state.json
sarpba's picture
Upload 19 files
b419333 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.999774113395076,
"eval_steps": 1000,
"global_step": 33201,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00903546419697312,
"grad_norm": 7.14754056930542,
"learning_rate": 6.8599999999999995e-06,
"loss": 1.8388,
"step": 100
},
{
"epoch": 0.01807092839394624,
"grad_norm": 6.974137783050537,
"learning_rate": 1.386e-05,
"loss": 1.2229,
"step": 200
},
{
"epoch": 0.02710639259091936,
"grad_norm": 6.240023612976074,
"learning_rate": 2.0859999999999997e-05,
"loss": 0.9997,
"step": 300
},
{
"epoch": 0.03614185678789248,
"grad_norm": 5.526381492614746,
"learning_rate": 2.7859999999999998e-05,
"loss": 0.8603,
"step": 400
},
{
"epoch": 0.0451773209848656,
"grad_norm": 6.0143938064575195,
"learning_rate": 3.4859999999999995e-05,
"loss": 0.7584,
"step": 500
},
{
"epoch": 0.05421278518183872,
"grad_norm": 5.181371212005615,
"learning_rate": 4.1859999999999996e-05,
"loss": 0.6841,
"step": 600
},
{
"epoch": 0.06324824937881184,
"grad_norm": 4.984240531921387,
"learning_rate": 4.885999999999999e-05,
"loss": 0.644,
"step": 700
},
{
"epoch": 0.07228371357578496,
"grad_norm": 5.0428266525268555,
"learning_rate": 5.586e-05,
"loss": 0.6025,
"step": 800
},
{
"epoch": 0.08131917777275807,
"grad_norm": 4.736971378326416,
"learning_rate": 6.285999999999999e-05,
"loss": 0.5776,
"step": 900
},
{
"epoch": 0.0903546419697312,
"grad_norm": 4.559544086456299,
"learning_rate": 6.986e-05,
"loss": 0.551,
"step": 1000
},
{
"epoch": 0.0903546419697312,
"eval_loss": 0.2710006833076477,
"eval_runtime": 89.4841,
"eval_samples_per_second": 47.64,
"eval_steps_per_second": 0.749,
"eval_wer": 0.26942939113802994,
"step": 1000
},
{
"epoch": 0.09939010616670431,
"grad_norm": 4.705749988555908,
"learning_rate": 6.978696313779074e-05,
"loss": 0.5356,
"step": 1100
},
{
"epoch": 0.10842557036367743,
"grad_norm": 4.287839412689209,
"learning_rate": 6.9569578584516e-05,
"loss": 0.5058,
"step": 1200
},
{
"epoch": 0.11746103456065056,
"grad_norm": 3.9484827518463135,
"learning_rate": 6.935219403124125e-05,
"loss": 0.4863,
"step": 1300
},
{
"epoch": 0.12649649875762367,
"grad_norm": 4.207424640655518,
"learning_rate": 6.913480947796651e-05,
"loss": 0.468,
"step": 1400
},
{
"epoch": 0.1355319629545968,
"grad_norm": 4.078378200531006,
"learning_rate": 6.891742492469178e-05,
"loss": 0.4522,
"step": 1500
},
{
"epoch": 0.14456742715156992,
"grad_norm": 3.6946797370910645,
"learning_rate": 6.870004037141703e-05,
"loss": 0.4396,
"step": 1600
},
{
"epoch": 0.15360289134854302,
"grad_norm": 3.742530345916748,
"learning_rate": 6.848265581814229e-05,
"loss": 0.4338,
"step": 1700
},
{
"epoch": 0.16263835554551614,
"grad_norm": 4.0423078536987305,
"learning_rate": 6.826527126486755e-05,
"loss": 0.4232,
"step": 1800
},
{
"epoch": 0.17167381974248927,
"grad_norm": 3.7348833084106445,
"learning_rate": 6.80478867115928e-05,
"loss": 0.4144,
"step": 1900
},
{
"epoch": 0.1807092839394624,
"grad_norm": 3.4496703147888184,
"learning_rate": 6.783050215831805e-05,
"loss": 0.4016,
"step": 2000
},
{
"epoch": 0.1807092839394624,
"eval_loss": 0.20093074440956116,
"eval_runtime": 89.3016,
"eval_samples_per_second": 47.737,
"eval_steps_per_second": 0.75,
"eval_wer": 0.20614174901710763,
"step": 2000
},
{
"epoch": 0.18974474813643552,
"grad_norm": 3.3866732120513916,
"learning_rate": 6.761311760504332e-05,
"loss": 0.3858,
"step": 2100
},
{
"epoch": 0.19878021233340862,
"grad_norm": 4.071012496948242,
"learning_rate": 6.739573305176857e-05,
"loss": 0.3875,
"step": 2200
},
{
"epoch": 0.20781567653038174,
"grad_norm": 3.373796224594116,
"learning_rate": 6.717834849849383e-05,
"loss": 0.3795,
"step": 2300
},
{
"epoch": 0.21685114072735487,
"grad_norm": 3.105025291442871,
"learning_rate": 6.696096394521908e-05,
"loss": 0.3787,
"step": 2400
},
{
"epoch": 0.225886604924328,
"grad_norm": 3.8723206520080566,
"learning_rate": 6.674357939194434e-05,
"loss": 0.3716,
"step": 2500
},
{
"epoch": 0.23492206912130112,
"grad_norm": 3.2043449878692627,
"learning_rate": 6.65261948386696e-05,
"loss": 0.3662,
"step": 2600
},
{
"epoch": 0.24395753331827422,
"grad_norm": 3.2647688388824463,
"learning_rate": 6.631098413092761e-05,
"loss": 0.3567,
"step": 2700
},
{
"epoch": 0.25299299751524734,
"grad_norm": 3.255851984024048,
"learning_rate": 6.609359957765287e-05,
"loss": 0.3541,
"step": 2800
},
{
"epoch": 0.26202846171222044,
"grad_norm": 3.103607177734375,
"learning_rate": 6.587621502437812e-05,
"loss": 0.3551,
"step": 2900
},
{
"epoch": 0.2710639259091936,
"grad_norm": 3.7592177391052246,
"learning_rate": 6.565883047110337e-05,
"loss": 0.3449,
"step": 3000
},
{
"epoch": 0.2710639259091936,
"eval_loss": 0.17070473730564117,
"eval_runtime": 88.3474,
"eval_samples_per_second": 48.253,
"eval_steps_per_second": 0.758,
"eval_wer": 0.17702688343427903,
"step": 3000
},
{
"epoch": 0.2800993901061667,
"grad_norm": 2.7764692306518555,
"learning_rate": 6.544144591782863e-05,
"loss": 0.3477,
"step": 3100
},
{
"epoch": 0.28913485430313984,
"grad_norm": 2.980421543121338,
"learning_rate": 6.522406136455388e-05,
"loss": 0.3367,
"step": 3200
},
{
"epoch": 0.29817031850011294,
"grad_norm": 3.0955636501312256,
"learning_rate": 6.500667681127915e-05,
"loss": 0.3347,
"step": 3300
},
{
"epoch": 0.30720578269708604,
"grad_norm": 2.942781925201416,
"learning_rate": 6.47892922580044e-05,
"loss": 0.3363,
"step": 3400
},
{
"epoch": 0.3162412468940592,
"grad_norm": 2.7990803718566895,
"learning_rate": 6.457190770472966e-05,
"loss": 0.3324,
"step": 3500
},
{
"epoch": 0.3252767110910323,
"grad_norm": 3.0384480953216553,
"learning_rate": 6.435452315145492e-05,
"loss": 0.3273,
"step": 3600
},
{
"epoch": 0.33431217528800544,
"grad_norm": 2.8415443897247314,
"learning_rate": 6.413713859818017e-05,
"loss": 0.3231,
"step": 3700
},
{
"epoch": 0.34334763948497854,
"grad_norm": 2.706265687942505,
"learning_rate": 6.391975404490544e-05,
"loss": 0.3224,
"step": 3800
},
{
"epoch": 0.35238310368195164,
"grad_norm": 2.77278995513916,
"learning_rate": 6.370236949163069e-05,
"loss": 0.32,
"step": 3900
},
{
"epoch": 0.3614185678789248,
"grad_norm": 2.9242990016937256,
"learning_rate": 6.348498493835595e-05,
"loss": 0.3147,
"step": 4000
},
{
"epoch": 0.3614185678789248,
"eval_loss": 0.1588164120912552,
"eval_runtime": 89.3911,
"eval_samples_per_second": 47.689,
"eval_steps_per_second": 0.75,
"eval_wer": 0.1649984061204973,
"step": 4000
},
{
"epoch": 0.3704540320758979,
"grad_norm": 3.196282148361206,
"learning_rate": 6.32676003850812e-05,
"loss": 0.3112,
"step": 4100
},
{
"epoch": 0.37948949627287104,
"grad_norm": 3.880776882171631,
"learning_rate": 6.305021583180646e-05,
"loss": 0.3154,
"step": 4200
},
{
"epoch": 0.38852496046984414,
"grad_norm": 2.7569668292999268,
"learning_rate": 6.283283127853171e-05,
"loss": 0.3108,
"step": 4300
},
{
"epoch": 0.39756042466681724,
"grad_norm": 2.951040267944336,
"learning_rate": 6.261544672525697e-05,
"loss": 0.3093,
"step": 4400
},
{
"epoch": 0.4065958888637904,
"grad_norm": 2.667750358581543,
"learning_rate": 6.239806217198222e-05,
"loss": 0.3082,
"step": 4500
},
{
"epoch": 0.4156313530607635,
"grad_norm": 2.872540235519409,
"learning_rate": 6.218067761870749e-05,
"loss": 0.3005,
"step": 4600
},
{
"epoch": 0.42466681725773664,
"grad_norm": 3.15378999710083,
"learning_rate": 6.196329306543275e-05,
"loss": 0.2994,
"step": 4700
},
{
"epoch": 0.43370228145470974,
"grad_norm": 2.879260301589966,
"learning_rate": 6.1745908512158e-05,
"loss": 0.2959,
"step": 4800
},
{
"epoch": 0.44273774565168283,
"grad_norm": 2.811612367630005,
"learning_rate": 6.152852395888326e-05,
"loss": 0.2974,
"step": 4900
},
{
"epoch": 0.451773209848656,
"grad_norm": 2.7307889461517334,
"learning_rate": 6.131113940560851e-05,
"loss": 0.2936,
"step": 5000
},
{
"epoch": 0.451773209848656,
"eval_loss": 0.1471971571445465,
"eval_runtime": 88.7501,
"eval_samples_per_second": 48.034,
"eval_steps_per_second": 0.755,
"eval_wer": 0.1551376049304006,
"step": 5000
},
{
"epoch": 0.4608086740456291,
"grad_norm": 2.734050750732422,
"learning_rate": 6.109375485233378e-05,
"loss": 0.2917,
"step": 5100
},
{
"epoch": 0.46984413824260224,
"grad_norm": 2.650491952896118,
"learning_rate": 6.0876370299059026e-05,
"loss": 0.2929,
"step": 5200
},
{
"epoch": 0.47887960243957534,
"grad_norm": 2.519413709640503,
"learning_rate": 6.065898574578429e-05,
"loss": 0.2919,
"step": 5300
},
{
"epoch": 0.48791506663654843,
"grad_norm": 2.6014676094055176,
"learning_rate": 6.0441601192509545e-05,
"loss": 0.2811,
"step": 5400
},
{
"epoch": 0.4969505308335216,
"grad_norm": 2.7325778007507324,
"learning_rate": 6.02242166392348e-05,
"loss": 0.2878,
"step": 5500
},
{
"epoch": 0.5059859950304947,
"grad_norm": 2.636491298675537,
"learning_rate": 6.000683208596006e-05,
"loss": 0.2821,
"step": 5600
},
{
"epoch": 0.5150214592274678,
"grad_norm": 2.6922860145568848,
"learning_rate": 5.9789447532685315e-05,
"loss": 0.2828,
"step": 5700
},
{
"epoch": 0.5240569234244409,
"grad_norm": 2.4657480716705322,
"learning_rate": 5.957206297941057e-05,
"loss": 0.2845,
"step": 5800
},
{
"epoch": 0.5330923876214141,
"grad_norm": 2.6574530601501465,
"learning_rate": 5.935467842613583e-05,
"loss": 0.28,
"step": 5900
},
{
"epoch": 0.5421278518183872,
"grad_norm": 2.769786834716797,
"learning_rate": 5.913729387286109e-05,
"loss": 0.2758,
"step": 6000
},
{
"epoch": 0.5421278518183872,
"eval_loss": 0.1405603438615799,
"eval_runtime": 90.2531,
"eval_samples_per_second": 47.234,
"eval_steps_per_second": 0.742,
"eval_wer": 0.14793326957815323,
"step": 6000
},
{
"epoch": 0.5511633160153603,
"grad_norm": 2.6292548179626465,
"learning_rate": 5.891990931958634e-05,
"loss": 0.2744,
"step": 6100
},
{
"epoch": 0.5601987802123334,
"grad_norm": 2.536770820617676,
"learning_rate": 5.87025247663116e-05,
"loss": 0.2735,
"step": 6200
},
{
"epoch": 0.5692342444093065,
"grad_norm": 2.3336434364318848,
"learning_rate": 5.848514021303685e-05,
"loss": 0.2764,
"step": 6300
},
{
"epoch": 0.5782697086062797,
"grad_norm": 2.677401542663574,
"learning_rate": 5.8267755659762116e-05,
"loss": 0.2761,
"step": 6400
},
{
"epoch": 0.5873051728032528,
"grad_norm": 2.634038209915161,
"learning_rate": 5.805037110648737e-05,
"loss": 0.2694,
"step": 6500
},
{
"epoch": 0.5963406370002259,
"grad_norm": 2.643404245376587,
"learning_rate": 5.783298655321262e-05,
"loss": 0.263,
"step": 6600
},
{
"epoch": 0.605376101197199,
"grad_norm": 2.2921056747436523,
"learning_rate": 5.7615601999937885e-05,
"loss": 0.2738,
"step": 6700
},
{
"epoch": 0.6144115653941721,
"grad_norm": 2.398670196533203,
"learning_rate": 5.739821744666314e-05,
"loss": 0.2682,
"step": 6800
},
{
"epoch": 0.6234470295911453,
"grad_norm": 2.447571277618408,
"learning_rate": 5.71808328933884e-05,
"loss": 0.2653,
"step": 6900
},
{
"epoch": 0.6324824937881184,
"grad_norm": 2.270413637161255,
"learning_rate": 5.6963448340113654e-05,
"loss": 0.2663,
"step": 7000
},
{
"epoch": 0.6324824937881184,
"eval_loss": 0.13218513131141663,
"eval_runtime": 89.0739,
"eval_samples_per_second": 47.859,
"eval_steps_per_second": 0.752,
"eval_wer": 0.13926256508341303,
"step": 7000
},
{
"epoch": 0.6415179579850915,
"grad_norm": 2.406534433364868,
"learning_rate": 5.674606378683892e-05,
"loss": 0.2701,
"step": 7100
},
{
"epoch": 0.6505534221820646,
"grad_norm": 2.3954741954803467,
"learning_rate": 5.652867923356417e-05,
"loss": 0.2661,
"step": 7200
},
{
"epoch": 0.6595888863790377,
"grad_norm": 2.3920400142669678,
"learning_rate": 5.631129468028943e-05,
"loss": 0.2662,
"step": 7300
},
{
"epoch": 0.6686243505760109,
"grad_norm": 2.6168298721313477,
"learning_rate": 5.6096083972547435e-05,
"loss": 0.259,
"step": 7400
},
{
"epoch": 0.677659814772984,
"grad_norm": 2.351517915725708,
"learning_rate": 5.587869941927269e-05,
"loss": 0.2531,
"step": 7500
},
{
"epoch": 0.6866952789699571,
"grad_norm": 2.4925589561462402,
"learning_rate": 5.566131486599794e-05,
"loss": 0.2584,
"step": 7600
},
{
"epoch": 0.6957307431669302,
"grad_norm": 2.465437650680542,
"learning_rate": 5.5443930312723204e-05,
"loss": 0.2572,
"step": 7700
},
{
"epoch": 0.7047662073639033,
"grad_norm": 2.383103370666504,
"learning_rate": 5.522654575944846e-05,
"loss": 0.2541,
"step": 7800
},
{
"epoch": 0.7138016715608765,
"grad_norm": 2.254746675491333,
"learning_rate": 5.5009161206173716e-05,
"loss": 0.2551,
"step": 7900
},
{
"epoch": 0.7228371357578496,
"grad_norm": 2.601073980331421,
"learning_rate": 5.479177665289897e-05,
"loss": 0.2613,
"step": 8000
},
{
"epoch": 0.7228371357578496,
"eval_loss": 0.1282639354467392,
"eval_runtime": 89.4564,
"eval_samples_per_second": 47.654,
"eval_steps_per_second": 0.749,
"eval_wer": 0.1401763893316332,
"step": 8000
},
{
"epoch": 0.7318725999548227,
"grad_norm": 2.6043508052825928,
"learning_rate": 5.4574392099624236e-05,
"loss": 0.2527,
"step": 8100
},
{
"epoch": 0.7409080641517958,
"grad_norm": 2.4817826747894287,
"learning_rate": 5.4357007546349486e-05,
"loss": 0.2531,
"step": 8200
},
{
"epoch": 0.7499435283487689,
"grad_norm": 2.2043120861053467,
"learning_rate": 5.413962299307475e-05,
"loss": 0.2508,
"step": 8300
},
{
"epoch": 0.7589789925457421,
"grad_norm": 2.436621904373169,
"learning_rate": 5.39222384398e-05,
"loss": 0.2524,
"step": 8400
},
{
"epoch": 0.7680144567427152,
"grad_norm": 2.2948272228240967,
"learning_rate": 5.3704853886525255e-05,
"loss": 0.2511,
"step": 8500
},
{
"epoch": 0.7770499209396883,
"grad_norm": 2.516068935394287,
"learning_rate": 5.348746933325052e-05,
"loss": 0.2503,
"step": 8600
},
{
"epoch": 0.7860853851366614,
"grad_norm": 2.286062002182007,
"learning_rate": 5.327008477997577e-05,
"loss": 0.249,
"step": 8700
},
{
"epoch": 0.7951208493336345,
"grad_norm": 2.2099480628967285,
"learning_rate": 5.305270022670103e-05,
"loss": 0.2476,
"step": 8800
},
{
"epoch": 0.8041563135306077,
"grad_norm": 2.279094934463501,
"learning_rate": 5.283531567342629e-05,
"loss": 0.2477,
"step": 8900
},
{
"epoch": 0.8131917777275808,
"grad_norm": 2.5608932971954346,
"learning_rate": 5.2617931120151544e-05,
"loss": 0.2491,
"step": 9000
},
{
"epoch": 0.8131917777275808,
"eval_loss": 0.12159302085638046,
"eval_runtime": 88.1859,
"eval_samples_per_second": 48.341,
"eval_steps_per_second": 0.76,
"eval_wer": 0.1319094676442461,
"step": 9000
},
{
"epoch": 0.8222272419245539,
"grad_norm": 2.8134467601776123,
"learning_rate": 5.24005465668768e-05,
"loss": 0.2393,
"step": 9100
},
{
"epoch": 0.831262706121527,
"grad_norm": 2.109177589416504,
"learning_rate": 5.218316201360206e-05,
"loss": 0.247,
"step": 9200
},
{
"epoch": 0.8402981703185001,
"grad_norm": 2.333599090576172,
"learning_rate": 5.196577746032731e-05,
"loss": 0.2396,
"step": 9300
},
{
"epoch": 0.8493336345154733,
"grad_norm": 2.263291120529175,
"learning_rate": 5.174839290705257e-05,
"loss": 0.2454,
"step": 9400
},
{
"epoch": 0.8583690987124464,
"grad_norm": 2.1932239532470703,
"learning_rate": 5.153100835377783e-05,
"loss": 0.2441,
"step": 9500
},
{
"epoch": 0.8674045629094195,
"grad_norm": 2.3545312881469727,
"learning_rate": 5.131362380050308e-05,
"loss": 0.2388,
"step": 9600
},
{
"epoch": 0.8764400271063926,
"grad_norm": 1.9302074909210205,
"learning_rate": 5.1096239247228345e-05,
"loss": 0.2386,
"step": 9700
},
{
"epoch": 0.8854754913033657,
"grad_norm": 2.2227907180786133,
"learning_rate": 5.0878854693953595e-05,
"loss": 0.245,
"step": 9800
},
{
"epoch": 0.8945109555003389,
"grad_norm": 2.0656354427337646,
"learning_rate": 5.066147014067886e-05,
"loss": 0.2341,
"step": 9900
},
{
"epoch": 0.903546419697312,
"grad_norm": 2.062394142150879,
"learning_rate": 5.0444085587404114e-05,
"loss": 0.238,
"step": 10000
},
{
"epoch": 0.903546419697312,
"eval_loss": 0.11923061311244965,
"eval_runtime": 88.4115,
"eval_samples_per_second": 48.218,
"eval_steps_per_second": 0.758,
"eval_wer": 0.1290829879927744,
"step": 10000
},
{
"epoch": 0.9125818838942851,
"grad_norm": 2.264702081680298,
"learning_rate": 5.022670103412938e-05,
"loss": 0.2386,
"step": 10100
},
{
"epoch": 0.9216173480912582,
"grad_norm": 2.0281338691711426,
"learning_rate": 5.000931648085463e-05,
"loss": 0.2374,
"step": 10200
},
{
"epoch": 0.9306528122882313,
"grad_norm": 2.0940310955047607,
"learning_rate": 4.9791931927579883e-05,
"loss": 0.2349,
"step": 10300
},
{
"epoch": 0.9396882764852045,
"grad_norm": 2.1335864067077637,
"learning_rate": 4.957454737430514e-05,
"loss": 0.2326,
"step": 10400
},
{
"epoch": 0.9487237406821776,
"grad_norm": 2.3644163608551025,
"learning_rate": 4.9357162821030396e-05,
"loss": 0.2314,
"step": 10500
},
{
"epoch": 0.9577592048791507,
"grad_norm": 2.029175043106079,
"learning_rate": 4.91419521132884e-05,
"loss": 0.2363,
"step": 10600
},
{
"epoch": 0.9667946690761238,
"grad_norm": 2.630101203918457,
"learning_rate": 4.8924567560013664e-05,
"loss": 0.2298,
"step": 10700
},
{
"epoch": 0.9758301332730969,
"grad_norm": 2.356724500656128,
"learning_rate": 4.870718300673891e-05,
"loss": 0.2269,
"step": 10800
},
{
"epoch": 0.9848655974700701,
"grad_norm": 2.1543145179748535,
"learning_rate": 4.8489798453464176e-05,
"loss": 0.2377,
"step": 10900
},
{
"epoch": 0.9939010616670432,
"grad_norm": 2.399824857711792,
"learning_rate": 4.827241390018943e-05,
"loss": 0.2287,
"step": 11000
},
{
"epoch": 0.9939010616670432,
"eval_loss": 0.11506820470094681,
"eval_runtime": 89.5431,
"eval_samples_per_second": 47.608,
"eval_steps_per_second": 0.748,
"eval_wer": 0.1275528636701732,
"step": 11000
},
{
"epoch": 1.0028913485430313,
"grad_norm": 2.18354868888855,
"learning_rate": 4.805502934691468e-05,
"loss": 0.2129,
"step": 11100
},
{
"epoch": 1.0119268127400045,
"grad_norm": 2.018084764480591,
"learning_rate": 4.7837644793639945e-05,
"loss": 0.1792,
"step": 11200
},
{
"epoch": 1.0209622769369777,
"grad_norm": 2.1397042274475098,
"learning_rate": 4.76202602403652e-05,
"loss": 0.1794,
"step": 11300
},
{
"epoch": 1.0299977411339507,
"grad_norm": 1.925986886024475,
"learning_rate": 4.740287568709046e-05,
"loss": 0.1816,
"step": 11400
},
{
"epoch": 1.039033205330924,
"grad_norm": 2.0704362392425537,
"learning_rate": 4.7185491133815715e-05,
"loss": 0.1767,
"step": 11500
},
{
"epoch": 1.048068669527897,
"grad_norm": 1.8338583707809448,
"learning_rate": 4.696810658054098e-05,
"loss": 0.1767,
"step": 11600
},
{
"epoch": 1.05710413372487,
"grad_norm": 1.9655053615570068,
"learning_rate": 4.675072202726623e-05,
"loss": 0.1814,
"step": 11700
},
{
"epoch": 1.0661395979218433,
"grad_norm": 1.880100965499878,
"learning_rate": 4.653333747399149e-05,
"loss": 0.1786,
"step": 11800
},
{
"epoch": 1.0751750621188163,
"grad_norm": 2.52089524269104,
"learning_rate": 4.631595292071674e-05,
"loss": 0.1796,
"step": 11900
},
{
"epoch": 1.0842105263157895,
"grad_norm": 2.179574728012085,
"learning_rate": 4.6098568367441997e-05,
"loss": 0.1798,
"step": 12000
},
{
"epoch": 1.0842105263157895,
"eval_loss": 0.11312589794397354,
"eval_runtime": 89.9356,
"eval_samples_per_second": 47.401,
"eval_steps_per_second": 0.745,
"eval_wer": 0.12343002868983105,
"step": 12000
},
{
"epoch": 1.0932459905127625,
"grad_norm": 2.3577959537506104,
"learning_rate": 4.588118381416726e-05,
"loss": 0.1834,
"step": 12100
},
{
"epoch": 1.1022814547097357,
"grad_norm": 2.2626988887786865,
"learning_rate": 4.566379926089251e-05,
"loss": 0.1792,
"step": 12200
},
{
"epoch": 1.111316918906709,
"grad_norm": 2.0373926162719727,
"learning_rate": 4.544641470761777e-05,
"loss": 0.1773,
"step": 12300
},
{
"epoch": 1.120352383103682,
"grad_norm": 1.8774733543395996,
"learning_rate": 4.522903015434303e-05,
"loss": 0.1763,
"step": 12400
},
{
"epoch": 1.1293878473006551,
"grad_norm": 2.0867061614990234,
"learning_rate": 4.5011645601068285e-05,
"loss": 0.1775,
"step": 12500
},
{
"epoch": 1.138423311497628,
"grad_norm": 1.822313904762268,
"learning_rate": 4.479426104779354e-05,
"loss": 0.182,
"step": 12600
},
{
"epoch": 1.1474587756946013,
"grad_norm": 1.9483801126480103,
"learning_rate": 4.4579050340051546e-05,
"loss": 0.1801,
"step": 12700
},
{
"epoch": 1.1564942398915745,
"grad_norm": 1.7819561958312988,
"learning_rate": 4.436166578677681e-05,
"loss": 0.175,
"step": 12800
},
{
"epoch": 1.1655297040885475,
"grad_norm": 2.2512149810791016,
"learning_rate": 4.414428123350206e-05,
"loss": 0.1771,
"step": 12900
},
{
"epoch": 1.1745651682855207,
"grad_norm": 2.0755016803741455,
"learning_rate": 4.3926896680227315e-05,
"loss": 0.1791,
"step": 13000
},
{
"epoch": 1.1745651682855207,
"eval_loss": 0.1113397553563118,
"eval_runtime": 89.8896,
"eval_samples_per_second": 47.425,
"eval_steps_per_second": 0.745,
"eval_wer": 0.11858463500159389,
"step": 13000
},
{
"epoch": 1.1836006324824937,
"grad_norm": 1.8246344327926636,
"learning_rate": 4.370951212695258e-05,
"loss": 0.1826,
"step": 13100
},
{
"epoch": 1.192636096679467,
"grad_norm": 2.0341689586639404,
"learning_rate": 4.349212757367783e-05,
"loss": 0.1795,
"step": 13200
},
{
"epoch": 1.2016715608764401,
"grad_norm": 1.8964906930923462,
"learning_rate": 4.327474302040309e-05,
"loss": 0.1777,
"step": 13300
},
{
"epoch": 1.210707025073413,
"grad_norm": 1.9983662366867065,
"learning_rate": 4.305735846712835e-05,
"loss": 0.1777,
"step": 13400
},
{
"epoch": 1.2197424892703863,
"grad_norm": 1.9901524782180786,
"learning_rate": 4.2839973913853604e-05,
"loss": 0.1745,
"step": 13500
},
{
"epoch": 1.2287779534673593,
"grad_norm": 2.0231523513793945,
"learning_rate": 4.262258936057886e-05,
"loss": 0.183,
"step": 13600
},
{
"epoch": 1.2378134176643325,
"grad_norm": 2.097205877304077,
"learning_rate": 4.240520480730412e-05,
"loss": 0.1795,
"step": 13700
},
{
"epoch": 1.2468488818613057,
"grad_norm": 1.8367393016815186,
"learning_rate": 4.218782025402937e-05,
"loss": 0.1746,
"step": 13800
},
{
"epoch": 1.2558843460582787,
"grad_norm": 2.2997806072235107,
"learning_rate": 4.197043570075463e-05,
"loss": 0.1781,
"step": 13900
},
{
"epoch": 1.264919810255252,
"grad_norm": 1.9972946643829346,
"learning_rate": 4.1753051147479886e-05,
"loss": 0.1787,
"step": 14000
},
{
"epoch": 1.264919810255252,
"eval_loss": 0.10852447897195816,
"eval_runtime": 88.2121,
"eval_samples_per_second": 48.327,
"eval_steps_per_second": 0.76,
"eval_wer": 0.11862713845499948,
"step": 14000
},
{
"epoch": 1.273955274452225,
"grad_norm": 1.9734628200531006,
"learning_rate": 4.153566659420514e-05,
"loss": 0.178,
"step": 14100
},
{
"epoch": 1.282990738649198,
"grad_norm": 2.0544159412384033,
"learning_rate": 4.1318282040930405e-05,
"loss": 0.1704,
"step": 14200
},
{
"epoch": 1.2920262028461713,
"grad_norm": 1.8968679904937744,
"learning_rate": 4.1100897487655655e-05,
"loss": 0.1772,
"step": 14300
},
{
"epoch": 1.3010616670431443,
"grad_norm": 1.8103258609771729,
"learning_rate": 4.088351293438092e-05,
"loss": 0.179,
"step": 14400
},
{
"epoch": 1.3100971312401175,
"grad_norm": 1.9365414381027222,
"learning_rate": 4.0666128381106174e-05,
"loss": 0.1775,
"step": 14500
},
{
"epoch": 1.3191325954370905,
"grad_norm": 1.9121586084365845,
"learning_rate": 4.044874382783143e-05,
"loss": 0.1772,
"step": 14600
},
{
"epoch": 1.3281680596340637,
"grad_norm": 2.0764715671539307,
"learning_rate": 4.023135927455669e-05,
"loss": 0.1719,
"step": 14700
},
{
"epoch": 1.337203523831037,
"grad_norm": 1.9687429666519165,
"learning_rate": 4.0013974721281944e-05,
"loss": 0.1735,
"step": 14800
},
{
"epoch": 1.34623898802801,
"grad_norm": 2.0690395832061768,
"learning_rate": 3.97965901680072e-05,
"loss": 0.1797,
"step": 14900
},
{
"epoch": 1.355274452224983,
"grad_norm": 2.121548891067505,
"learning_rate": 3.9579205614732456e-05,
"loss": 0.1771,
"step": 15000
},
{
"epoch": 1.355274452224983,
"eval_loss": 0.10677234828472137,
"eval_runtime": 88.6946,
"eval_samples_per_second": 48.064,
"eval_steps_per_second": 0.755,
"eval_wer": 0.11541812772287749,
"step": 15000
},
{
"epoch": 1.364309916421956,
"grad_norm": 2.3323662281036377,
"learning_rate": 3.936182106145772e-05,
"loss": 0.173,
"step": 15100
},
{
"epoch": 1.3733453806189293,
"grad_norm": 2.262308359146118,
"learning_rate": 3.914443650818297e-05,
"loss": 0.1723,
"step": 15200
},
{
"epoch": 1.3823808448159025,
"grad_norm": 2.0854151248931885,
"learning_rate": 3.892705195490823e-05,
"loss": 0.1753,
"step": 15300
},
{
"epoch": 1.3914163090128755,
"grad_norm": 2.0246262550354004,
"learning_rate": 3.870966740163348e-05,
"loss": 0.1742,
"step": 15400
},
{
"epoch": 1.4004517732098487,
"grad_norm": 2.0298593044281006,
"learning_rate": 3.8492282848358745e-05,
"loss": 0.1727,
"step": 15500
},
{
"epoch": 1.4094872374068217,
"grad_norm": 1.8497194051742554,
"learning_rate": 3.8274898295084e-05,
"loss": 0.1738,
"step": 15600
},
{
"epoch": 1.418522701603795,
"grad_norm": 2.052497386932373,
"learning_rate": 3.805751374180925e-05,
"loss": 0.1719,
"step": 15700
},
{
"epoch": 1.427558165800768,
"grad_norm": 1.948426604270935,
"learning_rate": 3.7840129188534514e-05,
"loss": 0.1692,
"step": 15800
},
{
"epoch": 1.436593629997741,
"grad_norm": 2.078310012817383,
"learning_rate": 3.762274463525977e-05,
"loss": 0.1736,
"step": 15900
},
{
"epoch": 1.4456290941947143,
"grad_norm": 1.8413662910461426,
"learning_rate": 3.740536008198503e-05,
"loss": 0.1728,
"step": 16000
},
{
"epoch": 1.4456290941947143,
"eval_loss": 0.10456942021846771,
"eval_runtime": 88.873,
"eval_samples_per_second": 47.967,
"eval_steps_per_second": 0.754,
"eval_wer": 0.11354797577303156,
"step": 16000
},
{
"epoch": 1.4546645583916873,
"grad_norm": 1.894006371498108,
"learning_rate": 3.7187975528710283e-05,
"loss": 0.1737,
"step": 16100
},
{
"epoch": 1.4637000225886605,
"grad_norm": 2.0090203285217285,
"learning_rate": 3.6970590975435547e-05,
"loss": 0.1723,
"step": 16200
},
{
"epoch": 1.4727354867856337,
"grad_norm": 1.896735668182373,
"learning_rate": 3.6753206422160796e-05,
"loss": 0.1744,
"step": 16300
},
{
"epoch": 1.4817709509826067,
"grad_norm": 1.9422425031661987,
"learning_rate": 3.653582186888606e-05,
"loss": 0.1662,
"step": 16400
},
{
"epoch": 1.49080641517958,
"grad_norm": 2.205997943878174,
"learning_rate": 3.6318437315611316e-05,
"loss": 0.1726,
"step": 16500
},
{
"epoch": 1.4998418793765529,
"grad_norm": 2.2248659133911133,
"learning_rate": 3.6101052762336565e-05,
"loss": 0.1739,
"step": 16600
},
{
"epoch": 1.508877343573526,
"grad_norm": 1.9154504537582397,
"learning_rate": 3.588366820906183e-05,
"loss": 0.1751,
"step": 16700
},
{
"epoch": 1.5179128077704993,
"grad_norm": 3.7510364055633545,
"learning_rate": 3.566845750131983e-05,
"loss": 0.1691,
"step": 16800
},
{
"epoch": 1.5269482719674723,
"grad_norm": 1.9326035976409912,
"learning_rate": 3.545107294804509e-05,
"loss": 0.1736,
"step": 16900
},
{
"epoch": 1.5359837361644455,
"grad_norm": 2.1534535884857178,
"learning_rate": 3.5233688394770345e-05,
"loss": 0.1714,
"step": 17000
},
{
"epoch": 1.5359837361644455,
"eval_loss": 0.10288450121879578,
"eval_runtime": 88.7852,
"eval_samples_per_second": 48.015,
"eval_steps_per_second": 0.755,
"eval_wer": 0.11522686218255233,
"step": 17000
},
{
"epoch": 1.5450192003614185,
"grad_norm": 2.0503385066986084,
"learning_rate": 3.50163038414956e-05,
"loss": 0.1697,
"step": 17100
},
{
"epoch": 1.5540546645583917,
"grad_norm": 2.1852426528930664,
"learning_rate": 3.479891928822086e-05,
"loss": 0.1687,
"step": 17200
},
{
"epoch": 1.563090128755365,
"grad_norm": 1.9237619638442993,
"learning_rate": 3.4581534734946115e-05,
"loss": 0.1699,
"step": 17300
},
{
"epoch": 1.572125592952338,
"grad_norm": 1.9139324426651,
"learning_rate": 3.436415018167137e-05,
"loss": 0.1721,
"step": 17400
},
{
"epoch": 1.581161057149311,
"grad_norm": 1.8762294054031372,
"learning_rate": 3.414676562839663e-05,
"loss": 0.1682,
"step": 17500
},
{
"epoch": 1.590196521346284,
"grad_norm": 1.6753225326538086,
"learning_rate": 3.392938107512189e-05,
"loss": 0.1648,
"step": 17600
},
{
"epoch": 1.5992319855432573,
"grad_norm": 2.4316673278808594,
"learning_rate": 3.371199652184715e-05,
"loss": 0.1701,
"step": 17700
},
{
"epoch": 1.6082674497402305,
"grad_norm": 1.9219187498092651,
"learning_rate": 3.34946119685724e-05,
"loss": 0.1669,
"step": 17800
},
{
"epoch": 1.6173029139372035,
"grad_norm": 1.6715503931045532,
"learning_rate": 3.327722741529766e-05,
"loss": 0.1675,
"step": 17900
},
{
"epoch": 1.6263383781341767,
"grad_norm": 1.9405934810638428,
"learning_rate": 3.3059842862022916e-05,
"loss": 0.1706,
"step": 18000
},
{
"epoch": 1.6263383781341767,
"eval_loss": 0.10067987442016602,
"eval_runtime": 89.3754,
"eval_samples_per_second": 47.698,
"eval_steps_per_second": 0.75,
"eval_wer": 0.11174157900329401,
"step": 18000
},
{
"epoch": 1.6353738423311497,
"grad_norm": 2.1481971740722656,
"learning_rate": 3.284245830874817e-05,
"loss": 0.1668,
"step": 18100
},
{
"epoch": 1.644409306528123,
"grad_norm": 2.29831600189209,
"learning_rate": 3.262507375547343e-05,
"loss": 0.1683,
"step": 18200
},
{
"epoch": 1.653444770725096,
"grad_norm": 1.698500633239746,
"learning_rate": 3.2407689202198685e-05,
"loss": 0.1651,
"step": 18300
},
{
"epoch": 1.662480234922069,
"grad_norm": 2.0010197162628174,
"learning_rate": 3.219030464892394e-05,
"loss": 0.1647,
"step": 18400
},
{
"epoch": 1.671515699119042,
"grad_norm": 1.8577830791473389,
"learning_rate": 3.19729200956492e-05,
"loss": 0.1649,
"step": 18500
},
{
"epoch": 1.6805511633160153,
"grad_norm": 2.0325686931610107,
"learning_rate": 3.175553554237446e-05,
"loss": 0.1664,
"step": 18600
},
{
"epoch": 1.6895866275129885,
"grad_norm": 1.8574236631393433,
"learning_rate": 3.153815098909972e-05,
"loss": 0.1646,
"step": 18700
},
{
"epoch": 1.6986220917099617,
"grad_norm": 1.94573175907135,
"learning_rate": 3.1320766435824974e-05,
"loss": 0.1623,
"step": 18800
},
{
"epoch": 1.7076575559069347,
"grad_norm": 1.9908078908920288,
"learning_rate": 3.1103381882550224e-05,
"loss": 0.1632,
"step": 18900
},
{
"epoch": 1.7166930201039077,
"grad_norm": 1.7018805742263794,
"learning_rate": 3.088599732927549e-05,
"loss": 0.163,
"step": 19000
},
{
"epoch": 1.7166930201039077,
"eval_loss": 0.09983944892883301,
"eval_runtime": 88.5039,
"eval_samples_per_second": 48.167,
"eval_steps_per_second": 0.757,
"eval_wer": 0.10740622675592391,
"step": 19000
},
{
"epoch": 1.7257284843008809,
"grad_norm": 1.8709958791732788,
"learning_rate": 3.066861277600074e-05,
"loss": 0.163,
"step": 19100
},
{
"epoch": 1.734763948497854,
"grad_norm": 2.1051034927368164,
"learning_rate": 3.0451228222726e-05,
"loss": 0.1632,
"step": 19200
},
{
"epoch": 1.7437994126948273,
"grad_norm": 2.1160008907318115,
"learning_rate": 3.0233843669451256e-05,
"loss": 0.1677,
"step": 19300
},
{
"epoch": 1.7528348768918003,
"grad_norm": 1.7885472774505615,
"learning_rate": 3.0016459116176512e-05,
"loss": 0.1628,
"step": 19400
},
{
"epoch": 1.7618703410887733,
"grad_norm": 1.7749061584472656,
"learning_rate": 2.9799074562901772e-05,
"loss": 0.1623,
"step": 19500
},
{
"epoch": 1.7709058052857465,
"grad_norm": 1.933435320854187,
"learning_rate": 2.958169000962703e-05,
"loss": 0.1639,
"step": 19600
},
{
"epoch": 1.7799412694827197,
"grad_norm": 1.7979782819747925,
"learning_rate": 2.9364305456352285e-05,
"loss": 0.1581,
"step": 19700
},
{
"epoch": 1.788976733679693,
"grad_norm": 1.9905706644058228,
"learning_rate": 2.914692090307754e-05,
"loss": 0.1623,
"step": 19800
},
{
"epoch": 1.7980121978766659,
"grad_norm": 2.146162271499634,
"learning_rate": 2.8929536349802798e-05,
"loss": 0.1632,
"step": 19900
},
{
"epoch": 1.8070476620736389,
"grad_norm": 1.861401081085205,
"learning_rate": 2.8712151796528054e-05,
"loss": 0.1613,
"step": 20000
},
{
"epoch": 1.8070476620736389,
"eval_loss": 0.09824151545763016,
"eval_runtime": 87.8053,
"eval_samples_per_second": 48.551,
"eval_steps_per_second": 0.763,
"eval_wer": 0.10753373711614068,
"step": 20000
},
{
"epoch": 1.816083126270612,
"grad_norm": 1.8411866426467896,
"learning_rate": 2.849476724325331e-05,
"loss": 0.165,
"step": 20100
},
{
"epoch": 1.8251185904675853,
"grad_norm": 1.7575931549072266,
"learning_rate": 2.827738268997857e-05,
"loss": 0.1564,
"step": 20200
},
{
"epoch": 1.8341540546645585,
"grad_norm": 2.028254985809326,
"learning_rate": 2.8059998136703827e-05,
"loss": 0.1589,
"step": 20300
},
{
"epoch": 1.8431895188615315,
"grad_norm": 1.9810631275177002,
"learning_rate": 2.7842613583429083e-05,
"loss": 0.1586,
"step": 20400
},
{
"epoch": 1.8522249830585045,
"grad_norm": 1.8610142469406128,
"learning_rate": 2.7625229030154343e-05,
"loss": 0.1602,
"step": 20500
},
{
"epoch": 1.8612604472554777,
"grad_norm": 1.9897997379302979,
"learning_rate": 2.74078444768796e-05,
"loss": 0.1625,
"step": 20600
},
{
"epoch": 1.8702959114524509,
"grad_norm": 1.7494564056396484,
"learning_rate": 2.7190459923604856e-05,
"loss": 0.1593,
"step": 20700
},
{
"epoch": 1.879331375649424,
"grad_norm": 1.9486002922058105,
"learning_rate": 2.6975249215862856e-05,
"loss": 0.1595,
"step": 20800
},
{
"epoch": 1.888366839846397,
"grad_norm": 1.950518012046814,
"learning_rate": 2.6757864662588116e-05,
"loss": 0.1619,
"step": 20900
},
{
"epoch": 1.89740230404337,
"grad_norm": 1.9625803232192993,
"learning_rate": 2.6540480109313373e-05,
"loss": 0.1568,
"step": 21000
},
{
"epoch": 1.89740230404337,
"eval_loss": 0.09674616158008575,
"eval_runtime": 88.8971,
"eval_samples_per_second": 47.954,
"eval_steps_per_second": 0.754,
"eval_wer": 0.10868133035809159,
"step": 21000
},
{
"epoch": 1.9064377682403433,
"grad_norm": 1.7447710037231445,
"learning_rate": 2.632309555603863e-05,
"loss": 0.1566,
"step": 21100
},
{
"epoch": 1.9154732324373165,
"grad_norm": 2.0597004890441895,
"learning_rate": 2.610571100276389e-05,
"loss": 0.1594,
"step": 21200
},
{
"epoch": 1.9245086966342897,
"grad_norm": 2.045921802520752,
"learning_rate": 2.5888326449489145e-05,
"loss": 0.1592,
"step": 21300
},
{
"epoch": 1.9335441608312627,
"grad_norm": 1.9995648860931396,
"learning_rate": 2.56709418962144e-05,
"loss": 0.1591,
"step": 21400
},
{
"epoch": 1.9425796250282357,
"grad_norm": 1.765527367591858,
"learning_rate": 2.5455731188472406e-05,
"loss": 0.1578,
"step": 21500
},
{
"epoch": 1.9516150892252089,
"grad_norm": 1.8758126497268677,
"learning_rate": 2.5238346635197665e-05,
"loss": 0.1577,
"step": 21600
},
{
"epoch": 1.960650553422182,
"grad_norm": 1.770780324935913,
"learning_rate": 2.502096208192292e-05,
"loss": 0.1584,
"step": 21700
},
{
"epoch": 1.9696860176191553,
"grad_norm": 1.8630551099777222,
"learning_rate": 2.4803577528648175e-05,
"loss": 0.1548,
"step": 21800
},
{
"epoch": 1.9787214818161283,
"grad_norm": 1.8517158031463623,
"learning_rate": 2.458619297537343e-05,
"loss": 0.1593,
"step": 21900
},
{
"epoch": 1.9877569460131013,
"grad_norm": 1.6973580121994019,
"learning_rate": 2.436880842209869e-05,
"loss": 0.1525,
"step": 22000
},
{
"epoch": 1.9877569460131013,
"eval_loss": 0.0945153757929802,
"eval_runtime": 87.5175,
"eval_samples_per_second": 48.71,
"eval_steps_per_second": 0.766,
"eval_wer": 0.10449474019764106,
"step": 22000
},
{
"epoch": 1.9967924102100745,
"grad_norm": 2.0748767852783203,
"learning_rate": 2.4151423868823947e-05,
"loss": 0.1573,
"step": 22100
},
{
"epoch": 2.0057826970860626,
"grad_norm": 1.6151518821716309,
"learning_rate": 2.3934039315549204e-05,
"loss": 0.1241,
"step": 22200
},
{
"epoch": 2.014818161283036,
"grad_norm": 1.5904980897903442,
"learning_rate": 2.3716654762274464e-05,
"loss": 0.1074,
"step": 22300
},
{
"epoch": 2.023853625480009,
"grad_norm": 1.4857326745986938,
"learning_rate": 2.349927020899972e-05,
"loss": 0.1029,
"step": 22400
},
{
"epoch": 2.0328890896769822,
"grad_norm": 1.7787961959838867,
"learning_rate": 2.3281885655724976e-05,
"loss": 0.1066,
"step": 22500
},
{
"epoch": 2.0419245538739554,
"grad_norm": 1.6591817140579224,
"learning_rate": 2.3066674947982977e-05,
"loss": 0.1057,
"step": 22600
},
{
"epoch": 2.050960018070928,
"grad_norm": 1.6939488649368286,
"learning_rate": 2.2849290394708237e-05,
"loss": 0.1051,
"step": 22700
},
{
"epoch": 2.0599954822679014,
"grad_norm": 1.5981281995773315,
"learning_rate": 2.2631905841433493e-05,
"loss": 0.1036,
"step": 22800
},
{
"epoch": 2.0690309464648746,
"grad_norm": 1.8668162822723389,
"learning_rate": 2.241452128815875e-05,
"loss": 0.1063,
"step": 22900
},
{
"epoch": 2.078066410661848,
"grad_norm": 1.627382755279541,
"learning_rate": 2.219713673488401e-05,
"loss": 0.1063,
"step": 23000
},
{
"epoch": 2.078066410661848,
"eval_loss": 0.0966850146651268,
"eval_runtime": 88.5935,
"eval_samples_per_second": 48.119,
"eval_steps_per_second": 0.756,
"eval_wer": 0.10462225055785783,
"step": 23000
},
{
"epoch": 2.087101874858821,
"grad_norm": 1.6317180395126343,
"learning_rate": 2.1979752181609266e-05,
"loss": 0.1067,
"step": 23100
},
{
"epoch": 2.096137339055794,
"grad_norm": 1.5637694597244263,
"learning_rate": 2.1762367628334522e-05,
"loss": 0.1061,
"step": 23200
},
{
"epoch": 2.105172803252767,
"grad_norm": 1.561661720275879,
"learning_rate": 2.154498307505978e-05,
"loss": 0.1066,
"step": 23300
},
{
"epoch": 2.11420826744974,
"grad_norm": 1.570977807044983,
"learning_rate": 2.132759852178504e-05,
"loss": 0.1057,
"step": 23400
},
{
"epoch": 2.1232437316467134,
"grad_norm": 1.6354864835739136,
"learning_rate": 2.111021396851029e-05,
"loss": 0.1061,
"step": 23500
},
{
"epoch": 2.1322791958436866,
"grad_norm": 1.6001309156417847,
"learning_rate": 2.0892829415235548e-05,
"loss": 0.1038,
"step": 23600
},
{
"epoch": 2.1413146600406594,
"grad_norm": 1.7492948770523071,
"learning_rate": 2.0675444861960808e-05,
"loss": 0.1051,
"step": 23700
},
{
"epoch": 2.1503501242376326,
"grad_norm": 1.7432228326797485,
"learning_rate": 2.0458060308686064e-05,
"loss": 0.1029,
"step": 23800
},
{
"epoch": 2.159385588434606,
"grad_norm": 1.5974751710891724,
"learning_rate": 2.024067575541132e-05,
"loss": 0.1061,
"step": 23900
},
{
"epoch": 2.168421052631579,
"grad_norm": 1.8045574426651,
"learning_rate": 2.0023291202136577e-05,
"loss": 0.1075,
"step": 24000
},
{
"epoch": 2.168421052631579,
"eval_loss": 0.0951407328248024,
"eval_runtime": 88.9045,
"eval_samples_per_second": 47.95,
"eval_steps_per_second": 0.754,
"eval_wer": 0.10304962278185102,
"step": 24000
},
{
"epoch": 2.1774565168285522,
"grad_norm": 1.6032062768936157,
"learning_rate": 1.9805906648861836e-05,
"loss": 0.1065,
"step": 24100
},
{
"epoch": 2.186491981025525,
"grad_norm": 1.5442743301391602,
"learning_rate": 1.9588522095587093e-05,
"loss": 0.1063,
"step": 24200
},
{
"epoch": 2.195527445222498,
"grad_norm": 1.6346817016601562,
"learning_rate": 1.937113754231235e-05,
"loss": 0.1036,
"step": 24300
},
{
"epoch": 2.2045629094194714,
"grad_norm": 1.6535338163375854,
"learning_rate": 1.9153752989037606e-05,
"loss": 0.1051,
"step": 24400
},
{
"epoch": 2.2135983736164446,
"grad_norm": 1.6055641174316406,
"learning_rate": 1.8936368435762862e-05,
"loss": 0.1064,
"step": 24500
},
{
"epoch": 2.222633837813418,
"grad_norm": 1.936577558517456,
"learning_rate": 1.871898388248812e-05,
"loss": 0.1045,
"step": 24600
},
{
"epoch": 2.2316693020103906,
"grad_norm": 1.58518385887146,
"learning_rate": 1.8501599329213375e-05,
"loss": 0.1071,
"step": 24700
},
{
"epoch": 2.240704766207364,
"grad_norm": 1.73505437374115,
"learning_rate": 1.8284214775938635e-05,
"loss": 0.1065,
"step": 24800
},
{
"epoch": 2.249740230404337,
"grad_norm": 1.7908620834350586,
"learning_rate": 1.806683022266389e-05,
"loss": 0.1065,
"step": 24900
},
{
"epoch": 2.2587756946013102,
"grad_norm": 1.654637336730957,
"learning_rate": 1.7849445669389147e-05,
"loss": 0.1035,
"step": 25000
},
{
"epoch": 2.2587756946013102,
"eval_loss": 0.09359237551689148,
"eval_runtime": 90.6143,
"eval_samples_per_second": 47.046,
"eval_steps_per_second": 0.739,
"eval_wer": 0.10149824673254702,
"step": 25000
},
{
"epoch": 2.2678111587982834,
"grad_norm": 1.6015100479125977,
"learning_rate": 1.7632061116114407e-05,
"loss": 0.1062,
"step": 25100
},
{
"epoch": 2.276846622995256,
"grad_norm": 1.6547913551330566,
"learning_rate": 1.741467656283966e-05,
"loss": 0.1053,
"step": 25200
},
{
"epoch": 2.2858820871922294,
"grad_norm": 1.7010306119918823,
"learning_rate": 1.719729200956492e-05,
"loss": 0.1041,
"step": 25300
},
{
"epoch": 2.2949175513892026,
"grad_norm": 1.8139252662658691,
"learning_rate": 1.6979907456290176e-05,
"loss": 0.103,
"step": 25400
},
{
"epoch": 2.303953015586176,
"grad_norm": 1.6318985223770142,
"learning_rate": 1.6762522903015433e-05,
"loss": 0.104,
"step": 25500
},
{
"epoch": 2.312988479783149,
"grad_norm": 1.798727035522461,
"learning_rate": 1.654513834974069e-05,
"loss": 0.1055,
"step": 25600
},
{
"epoch": 2.322023943980122,
"grad_norm": 1.527917504310608,
"learning_rate": 1.6327753796465945e-05,
"loss": 0.106,
"step": 25700
},
{
"epoch": 2.331059408177095,
"grad_norm": 1.6333855390548706,
"learning_rate": 1.6110369243191205e-05,
"loss": 0.1024,
"step": 25800
},
{
"epoch": 2.340094872374068,
"grad_norm": 1.5563682317733765,
"learning_rate": 1.589298468991646e-05,
"loss": 0.1031,
"step": 25900
},
{
"epoch": 2.3491303365710414,
"grad_norm": 1.6106479167938232,
"learning_rate": 1.5675600136641718e-05,
"loss": 0.1056,
"step": 26000
},
{
"epoch": 2.3491303365710414,
"eval_loss": 0.09276529401540756,
"eval_runtime": 88.7242,
"eval_samples_per_second": 48.048,
"eval_steps_per_second": 0.755,
"eval_wer": 0.10132823291892466,
"step": 26000
},
{
"epoch": 2.3581658007680146,
"grad_norm": 1.8455883264541626,
"learning_rate": 1.5458215583366974e-05,
"loss": 0.1043,
"step": 26100
},
{
"epoch": 2.3672012649649874,
"grad_norm": 1.7726097106933594,
"learning_rate": 1.5240831030092233e-05,
"loss": 0.1015,
"step": 26200
},
{
"epoch": 2.3762367291619606,
"grad_norm": 1.6910566091537476,
"learning_rate": 1.5023446476817489e-05,
"loss": 0.1055,
"step": 26300
},
{
"epoch": 2.385272193358934,
"grad_norm": 1.642712116241455,
"learning_rate": 1.4806061923542747e-05,
"loss": 0.1027,
"step": 26400
},
{
"epoch": 2.394307657555907,
"grad_norm": 1.6066936254501343,
"learning_rate": 1.4588677370268002e-05,
"loss": 0.1052,
"step": 26500
},
{
"epoch": 2.4033431217528802,
"grad_norm": 1.7851406335830688,
"learning_rate": 1.437129281699326e-05,
"loss": 0.1029,
"step": 26600
},
{
"epoch": 2.412378585949853,
"grad_norm": 1.9918655157089233,
"learning_rate": 1.4153908263718516e-05,
"loss": 0.1006,
"step": 26700
},
{
"epoch": 2.421414050146826,
"grad_norm": 1.6415534019470215,
"learning_rate": 1.3936523710443774e-05,
"loss": 0.1038,
"step": 26800
},
{
"epoch": 2.4304495143437994,
"grad_norm": 1.9253250360488892,
"learning_rate": 1.3719139157169032e-05,
"loss": 0.1024,
"step": 26900
},
{
"epoch": 2.4394849785407726,
"grad_norm": 1.86326265335083,
"learning_rate": 1.3501754603894287e-05,
"loss": 0.1019,
"step": 27000
},
{
"epoch": 2.4394849785407726,
"eval_loss": 0.09212099760770798,
"eval_runtime": 88.0292,
"eval_samples_per_second": 48.427,
"eval_steps_per_second": 0.761,
"eval_wer": 0.1000106258633514,
"step": 27000
},
{
"epoch": 2.448520442737746,
"grad_norm": 1.7671024799346924,
"learning_rate": 1.3284370050619545e-05,
"loss": 0.1026,
"step": 27100
},
{
"epoch": 2.4575559069347186,
"grad_norm": 1.7686715126037598,
"learning_rate": 1.3066985497344802e-05,
"loss": 0.1041,
"step": 27200
},
{
"epoch": 2.466591371131692,
"grad_norm": 1.743655800819397,
"learning_rate": 1.284960094407006e-05,
"loss": 0.099,
"step": 27300
},
{
"epoch": 2.475626835328665,
"grad_norm": 1.7912476062774658,
"learning_rate": 1.2632216390795314e-05,
"loss": 0.1034,
"step": 27400
},
{
"epoch": 2.484662299525638,
"grad_norm": 1.5481427907943726,
"learning_rate": 1.2414831837520572e-05,
"loss": 0.1037,
"step": 27500
},
{
"epoch": 2.4936977637226114,
"grad_norm": 1.5013809204101562,
"learning_rate": 1.219744728424583e-05,
"loss": 0.1028,
"step": 27600
},
{
"epoch": 2.5027332279195846,
"grad_norm": 1.592502236366272,
"learning_rate": 1.1980062730971087e-05,
"loss": 0.1024,
"step": 27700
},
{
"epoch": 2.5117686921165574,
"grad_norm": 1.6279585361480713,
"learning_rate": 1.1762678177696345e-05,
"loss": 0.1017,
"step": 27800
},
{
"epoch": 2.5208041563135306,
"grad_norm": 1.718693733215332,
"learning_rate": 1.15452936244216e-05,
"loss": 0.0991,
"step": 27900
},
{
"epoch": 2.529839620510504,
"grad_norm": 1.721211314201355,
"learning_rate": 1.1327909071146858e-05,
"loss": 0.1004,
"step": 28000
},
{
"epoch": 2.529839620510504,
"eval_loss": 0.0911058560013771,
"eval_runtime": 86.9208,
"eval_samples_per_second": 49.045,
"eval_steps_per_second": 0.771,
"eval_wer": 0.09856550844756136,
"step": 28000
},
{
"epoch": 2.5388750847074766,
"grad_norm": 1.708903193473816,
"learning_rate": 1.1110524517872116e-05,
"loss": 0.1032,
"step": 28100
},
{
"epoch": 2.54791054890445,
"grad_norm": 1.6191095113754272,
"learning_rate": 1.0893139964597372e-05,
"loss": 0.1031,
"step": 28200
},
{
"epoch": 2.556946013101423,
"grad_norm": 1.5952250957489014,
"learning_rate": 1.0677929256855375e-05,
"loss": 0.0991,
"step": 28300
},
{
"epoch": 2.565981477298396,
"grad_norm": 1.8054704666137695,
"learning_rate": 1.0460544703580633e-05,
"loss": 0.0994,
"step": 28400
},
{
"epoch": 2.5750169414953694,
"grad_norm": 1.4976806640625,
"learning_rate": 1.024316015030589e-05,
"loss": 0.0988,
"step": 28500
},
{
"epoch": 2.5840524056923426,
"grad_norm": 1.6461458206176758,
"learning_rate": 1.0025775597031147e-05,
"loss": 0.0989,
"step": 28600
},
{
"epoch": 2.593087869889316,
"grad_norm": 1.631536841392517,
"learning_rate": 9.808391043756405e-06,
"loss": 0.1001,
"step": 28700
},
{
"epoch": 2.6021233340862886,
"grad_norm": 1.8152861595153809,
"learning_rate": 9.59100649048166e-06,
"loss": 0.1001,
"step": 28800
},
{
"epoch": 2.611158798283262,
"grad_norm": 1.4996885061264038,
"learning_rate": 9.373621937206918e-06,
"loss": 0.103,
"step": 28900
},
{
"epoch": 2.620194262480235,
"grad_norm": 1.8811280727386475,
"learning_rate": 9.156237383932176e-06,
"loss": 0.0992,
"step": 29000
},
{
"epoch": 2.620194262480235,
"eval_loss": 0.09040974825620651,
"eval_runtime": 87.3549,
"eval_samples_per_second": 48.801,
"eval_steps_per_second": 0.767,
"eval_wer": 0.0979917118265859,
"step": 29000
},
{
"epoch": 2.629229726677208,
"grad_norm": 1.550436019897461,
"learning_rate": 8.938852830657433e-06,
"loss": 0.0997,
"step": 29100
},
{
"epoch": 2.638265190874181,
"grad_norm": 1.7116386890411377,
"learning_rate": 8.721468277382689e-06,
"loss": 0.1021,
"step": 29200
},
{
"epoch": 2.647300655071154,
"grad_norm": 1.8250106573104858,
"learning_rate": 8.504083724107947e-06,
"loss": 0.0992,
"step": 29300
},
{
"epoch": 2.6563361192681274,
"grad_norm": 1.704163670539856,
"learning_rate": 8.286699170833203e-06,
"loss": 0.0974,
"step": 29400
},
{
"epoch": 2.6653715834651006,
"grad_norm": 1.7405962944030762,
"learning_rate": 8.06931461755846e-06,
"loss": 0.0997,
"step": 29500
},
{
"epoch": 2.674407047662074,
"grad_norm": 1.599592685699463,
"learning_rate": 7.851930064283716e-06,
"loss": 0.0978,
"step": 29600
},
{
"epoch": 2.683442511859047,
"grad_norm": 1.666237711906433,
"learning_rate": 7.634545511008974e-06,
"loss": 0.0986,
"step": 29700
},
{
"epoch": 2.69247797605602,
"grad_norm": 1.6730016469955444,
"learning_rate": 7.417160957734231e-06,
"loss": 0.0958,
"step": 29800
},
{
"epoch": 2.701513440252993,
"grad_norm": 1.800661325454712,
"learning_rate": 7.199776404459488e-06,
"loss": 0.0967,
"step": 29900
},
{
"epoch": 2.710548904449966,
"grad_norm": 1.4267141819000244,
"learning_rate": 6.982391851184745e-06,
"loss": 0.1011,
"step": 30000
},
{
"epoch": 2.710548904449966,
"eval_loss": 0.08978110551834106,
"eval_runtime": 92.3004,
"eval_samples_per_second": 46.186,
"eval_steps_per_second": 0.726,
"eval_wer": 0.09784294973966635,
"step": 30000
},
{
"epoch": 2.719584368646939,
"grad_norm": 1.7578014135360718,
"learning_rate": 6.765007297910002e-06,
"loss": 0.0988,
"step": 30100
},
{
"epoch": 2.728619832843912,
"grad_norm": 1.747879981994629,
"learning_rate": 6.54762274463526e-06,
"loss": 0.0982,
"step": 30200
},
{
"epoch": 2.7376552970408854,
"grad_norm": 1.4880852699279785,
"learning_rate": 6.330238191360516e-06,
"loss": 0.0944,
"step": 30300
},
{
"epoch": 2.7466907612378586,
"grad_norm": 1.6102066040039062,
"learning_rate": 6.112853638085773e-06,
"loss": 0.099,
"step": 30400
},
{
"epoch": 2.755726225434832,
"grad_norm": 2.1802284717559814,
"learning_rate": 5.89546908481103e-06,
"loss": 0.0963,
"step": 30500
},
{
"epoch": 2.764761689631805,
"grad_norm": 1.65652334690094,
"learning_rate": 5.680258377069035e-06,
"loss": 0.099,
"step": 30600
},
{
"epoch": 2.7737971538287782,
"grad_norm": 1.344401240348816,
"learning_rate": 5.462873823794291e-06,
"loss": 0.0979,
"step": 30700
},
{
"epoch": 2.782832618025751,
"grad_norm": 1.6446696519851685,
"learning_rate": 5.245489270519548e-06,
"loss": 0.0944,
"step": 30800
},
{
"epoch": 2.791868082222724,
"grad_norm": 1.529815435409546,
"learning_rate": 5.028104717244806e-06,
"loss": 0.0967,
"step": 30900
},
{
"epoch": 2.8009035464196974,
"grad_norm": 1.7729915380477905,
"learning_rate": 4.810720163970063e-06,
"loss": 0.095,
"step": 31000
},
{
"epoch": 2.8009035464196974,
"eval_loss": 0.08919844031333923,
"eval_runtime": 90.4055,
"eval_samples_per_second": 47.154,
"eval_steps_per_second": 0.741,
"eval_wer": 0.09748167038571884,
"step": 31000
},
{
"epoch": 2.80993901061667,
"grad_norm": 1.6226630210876465,
"learning_rate": 4.59333561069532e-06,
"loss": 0.0982,
"step": 31100
},
{
"epoch": 2.8189744748136434,
"grad_norm": 1.5628806352615356,
"learning_rate": 4.375951057420576e-06,
"loss": 0.095,
"step": 31200
},
{
"epoch": 2.8280099390106166,
"grad_norm": 1.5284922122955322,
"learning_rate": 4.158566504145834e-06,
"loss": 0.0945,
"step": 31300
},
{
"epoch": 2.83704540320759,
"grad_norm": 1.9399908781051636,
"learning_rate": 3.941181950871091e-06,
"loss": 0.0954,
"step": 31400
},
{
"epoch": 2.846080867404563,
"grad_norm": 1.7431321144104004,
"learning_rate": 3.7237973975963476e-06,
"loss": 0.0973,
"step": 31500
},
{
"epoch": 2.855116331601536,
"grad_norm": 1.4165501594543457,
"learning_rate": 3.5064128443216044e-06,
"loss": 0.0954,
"step": 31600
},
{
"epoch": 2.8641517957985094,
"grad_norm": 1.8231940269470215,
"learning_rate": 3.2890282910468617e-06,
"loss": 0.0969,
"step": 31700
},
{
"epoch": 2.873187259995482,
"grad_norm": 1.9092686176300049,
"learning_rate": 3.0716437377721185e-06,
"loss": 0.0967,
"step": 31800
},
{
"epoch": 2.8822227241924554,
"grad_norm": 1.6101560592651367,
"learning_rate": 2.8542591844973753e-06,
"loss": 0.0973,
"step": 31900
},
{
"epoch": 2.8912581883894286,
"grad_norm": 1.6077231168746948,
"learning_rate": 2.636874631222633e-06,
"loss": 0.0975,
"step": 32000
},
{
"epoch": 2.8912581883894286,
"eval_loss": 0.08852633088827133,
"eval_runtime": 88.8694,
"eval_samples_per_second": 47.969,
"eval_steps_per_second": 0.754,
"eval_wer": 0.096015301243226,
"step": 32000
},
{
"epoch": 2.9002936525864014,
"grad_norm": 1.6472060680389404,
"learning_rate": 2.4194900779478898e-06,
"loss": 0.0953,
"step": 32100
},
{
"epoch": 2.9093291167833746,
"grad_norm": 1.5193005800247192,
"learning_rate": 2.2021055246731466e-06,
"loss": 0.0947,
"step": 32200
},
{
"epoch": 2.918364580980348,
"grad_norm": 1.3484536409378052,
"learning_rate": 1.984720971398404e-06,
"loss": 0.0937,
"step": 32300
},
{
"epoch": 2.927400045177321,
"grad_norm": 1.6725506782531738,
"learning_rate": 1.7673364181236606e-06,
"loss": 0.0949,
"step": 32400
},
{
"epoch": 2.936435509374294,
"grad_norm": 1.5670363903045654,
"learning_rate": 1.5499518648489175e-06,
"loss": 0.0928,
"step": 32500
},
{
"epoch": 2.9454709735712674,
"grad_norm": 1.5655218362808228,
"learning_rate": 1.3325673115741747e-06,
"loss": 0.0923,
"step": 32600
},
{
"epoch": 2.95450643776824,
"grad_norm": 1.7287861108779907,
"learning_rate": 1.1151827582994317e-06,
"loss": 0.0945,
"step": 32700
},
{
"epoch": 2.9635419019652134,
"grad_norm": 1.5101486444473267,
"learning_rate": 8.977982050246885e-07,
"loss": 0.0938,
"step": 32800
},
{
"epoch": 2.9725773661621866,
"grad_norm": 1.4109468460083008,
"learning_rate": 6.804136517499457e-07,
"loss": 0.091,
"step": 32900
},
{
"epoch": 2.98161283035916,
"grad_norm": 1.537053108215332,
"learning_rate": 4.6302909847520263e-07,
"loss": 0.0963,
"step": 33000
},
{
"epoch": 2.98161283035916,
"eval_loss": 0.08801376074552536,
"eval_runtime": 90.4227,
"eval_samples_per_second": 47.145,
"eval_steps_per_second": 0.741,
"eval_wer": 0.09624907023695675,
"step": 33000
},
{
"epoch": 2.9906482945561326,
"grad_norm": 1.575260043144226,
"learning_rate": 2.47818390733207e-07,
"loss": 0.0934,
"step": 33100
},
{
"epoch": 2.9996837587531058,
"grad_norm": 1.5032224655151367,
"learning_rate": 3.043383745846402e-08,
"loss": 0.0941,
"step": 33200
},
{
"epoch": 2.999774113395076,
"step": 33201,
"total_flos": 2.756290459511145e+20,
"train_loss": 0.20740618139383307,
"train_runtime": 51184.2268,
"train_samples_per_second": 83.03,
"train_steps_per_second": 0.649
}
],
"logging_steps": 100,
"max_steps": 33201,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.756290459511145e+20,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}