Muedi's picture
Training in progress, step 100000, checkpoint
172929e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9771827820393805,
"eval_steps": 500,
"global_step": 100000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0009771827820393806,
"grad_norm": 0.5417118072509766,
"learning_rate": 4.995602247740044e-05,
"loss": 1.378,
"step": 100
},
{
"epoch": 0.001954365564078761,
"grad_norm": 0.6493918895721436,
"learning_rate": 4.990715856340093e-05,
"loss": 1.3304,
"step": 200
},
{
"epoch": 0.0029315483461181415,
"grad_norm": 0.9062462449073792,
"learning_rate": 4.9858294649401425e-05,
"loss": 1.3284,
"step": 300
},
{
"epoch": 0.003908731128157522,
"grad_norm": 0.750052273273468,
"learning_rate": 4.9809430735401906e-05,
"loss": 1.3166,
"step": 400
},
{
"epoch": 0.004885913910196903,
"grad_norm": 0.6602022051811218,
"learning_rate": 4.97605668214024e-05,
"loss": 1.3166,
"step": 500
},
{
"epoch": 0.005863096692236283,
"grad_norm": 0.4193927049636841,
"learning_rate": 4.971170290740288e-05,
"loss": 1.3098,
"step": 600
},
{
"epoch": 0.006840279474275663,
"grad_norm": 0.6095415949821472,
"learning_rate": 4.966283899340338e-05,
"loss": 1.3103,
"step": 700
},
{
"epoch": 0.007817462256315045,
"grad_norm": 0.9943467378616333,
"learning_rate": 4.9613975079403865e-05,
"loss": 1.3096,
"step": 800
},
{
"epoch": 0.008794645038354424,
"grad_norm": 1.2263585329055786,
"learning_rate": 4.9565111165404346e-05,
"loss": 1.3067,
"step": 900
},
{
"epoch": 0.009771827820393805,
"grad_norm": 0.7198677659034729,
"learning_rate": 4.951624725140484e-05,
"loss": 1.3041,
"step": 1000
},
{
"epoch": 0.010749010602433185,
"grad_norm": 0.7370775938034058,
"learning_rate": 4.946738333740533e-05,
"loss": 1.302,
"step": 1100
},
{
"epoch": 0.011726193384472566,
"grad_norm": 0.5109437704086304,
"learning_rate": 4.941851942340582e-05,
"loss": 1.3089,
"step": 1200
},
{
"epoch": 0.012703376166511945,
"grad_norm": 0.1879555583000183,
"learning_rate": 4.9369655509406305e-05,
"loss": 1.3043,
"step": 1300
},
{
"epoch": 0.013680558948551327,
"grad_norm": 0.951046884059906,
"learning_rate": 4.932079159540679e-05,
"loss": 1.3098,
"step": 1400
},
{
"epoch": 0.014657741730590706,
"grad_norm": 0.2478829026222229,
"learning_rate": 4.927192768140728e-05,
"loss": 1.3026,
"step": 1500
},
{
"epoch": 0.01563492451263009,
"grad_norm": 0.5585843324661255,
"learning_rate": 4.9223063767407776e-05,
"loss": 1.3014,
"step": 1600
},
{
"epoch": 0.016612107294669467,
"grad_norm": 0.48532453179359436,
"learning_rate": 4.917419985340826e-05,
"loss": 1.2981,
"step": 1700
},
{
"epoch": 0.017589290076708848,
"grad_norm": 0.4233573079109192,
"learning_rate": 4.912533593940875e-05,
"loss": 1.2992,
"step": 1800
},
{
"epoch": 0.01856647285874823,
"grad_norm": 0.3272475600242615,
"learning_rate": 4.9076472025409234e-05,
"loss": 1.292,
"step": 1900
},
{
"epoch": 0.01954365564078761,
"grad_norm": 0.5299385786056519,
"learning_rate": 4.902760811140973e-05,
"loss": 1.2963,
"step": 2000
},
{
"epoch": 0.02052083842282699,
"grad_norm": 0.1614024043083191,
"learning_rate": 4.8978744197410216e-05,
"loss": 1.2945,
"step": 2100
},
{
"epoch": 0.02149802120486637,
"grad_norm": 0.6039963960647583,
"learning_rate": 4.8929880283410705e-05,
"loss": 1.2913,
"step": 2200
},
{
"epoch": 0.02247520398690575,
"grad_norm": 0.5772804021835327,
"learning_rate": 4.888101636941119e-05,
"loss": 1.2895,
"step": 2300
},
{
"epoch": 0.023452386768945132,
"grad_norm": 0.7489622235298157,
"learning_rate": 4.883215245541168e-05,
"loss": 1.2847,
"step": 2400
},
{
"epoch": 0.024429569550984513,
"grad_norm": 0.30208253860473633,
"learning_rate": 4.878328854141217e-05,
"loss": 1.2924,
"step": 2500
},
{
"epoch": 0.02540675233302389,
"grad_norm": 0.36944472789764404,
"learning_rate": 4.873442462741266e-05,
"loss": 1.2916,
"step": 2600
},
{
"epoch": 0.026383935115063272,
"grad_norm": 0.3268676698207855,
"learning_rate": 4.8685560713413145e-05,
"loss": 1.2893,
"step": 2700
},
{
"epoch": 0.027361117897102653,
"grad_norm": 0.2795974910259247,
"learning_rate": 4.863669679941363e-05,
"loss": 1.282,
"step": 2800
},
{
"epoch": 0.028338300679142035,
"grad_norm": 0.36298853158950806,
"learning_rate": 4.858783288541413e-05,
"loss": 1.2832,
"step": 2900
},
{
"epoch": 0.029315483461181412,
"grad_norm": 0.5242423415184021,
"learning_rate": 4.853896897141461e-05,
"loss": 1.2819,
"step": 3000
},
{
"epoch": 0.030292666243220794,
"grad_norm": 0.25340864062309265,
"learning_rate": 4.8490105057415104e-05,
"loss": 1.2809,
"step": 3100
},
{
"epoch": 0.03126984902526018,
"grad_norm": 0.7241976261138916,
"learning_rate": 4.844124114341559e-05,
"loss": 1.2802,
"step": 3200
},
{
"epoch": 0.032247031807299556,
"grad_norm": 0.5154001712799072,
"learning_rate": 4.839237722941608e-05,
"loss": 1.2748,
"step": 3300
},
{
"epoch": 0.033224214589338934,
"grad_norm": 0.5323473811149597,
"learning_rate": 4.834351331541657e-05,
"loss": 1.284,
"step": 3400
},
{
"epoch": 0.03420139737137832,
"grad_norm": 0.3947168290615082,
"learning_rate": 4.8294649401417056e-05,
"loss": 1.276,
"step": 3500
},
{
"epoch": 0.035178580153417696,
"grad_norm": 0.4776057302951813,
"learning_rate": 4.8245785487417544e-05,
"loss": 1.2783,
"step": 3600
},
{
"epoch": 0.036155762935457074,
"grad_norm": 0.4884164035320282,
"learning_rate": 4.819692157341804e-05,
"loss": 1.2745,
"step": 3700
},
{
"epoch": 0.03713294571749646,
"grad_norm": 0.5210428833961487,
"learning_rate": 4.814805765941852e-05,
"loss": 1.2707,
"step": 3800
},
{
"epoch": 0.038110128499535836,
"grad_norm": 0.46214359998703003,
"learning_rate": 4.809919374541901e-05,
"loss": 1.2727,
"step": 3900
},
{
"epoch": 0.03908731128157522,
"grad_norm": 0.2656782865524292,
"learning_rate": 4.8050329831419496e-05,
"loss": 1.2694,
"step": 4000
},
{
"epoch": 0.0400644940636146,
"grad_norm": 0.4923059940338135,
"learning_rate": 4.8001465917419985e-05,
"loss": 1.2665,
"step": 4100
},
{
"epoch": 0.04104167684565398,
"grad_norm": 0.92928147315979,
"learning_rate": 4.795260200342048e-05,
"loss": 1.2627,
"step": 4200
},
{
"epoch": 0.04201885962769336,
"grad_norm": 1.0651229619979858,
"learning_rate": 4.790373808942096e-05,
"loss": 1.2623,
"step": 4300
},
{
"epoch": 0.04299604240973274,
"grad_norm": 0.9612557888031006,
"learning_rate": 4.7854874175421456e-05,
"loss": 1.2482,
"step": 4400
},
{
"epoch": 0.043973225191772124,
"grad_norm": 1.0120874643325806,
"learning_rate": 4.7806010261421944e-05,
"loss": 1.2589,
"step": 4500
},
{
"epoch": 0.0449504079738115,
"grad_norm": 0.6250020861625671,
"learning_rate": 4.775714634742243e-05,
"loss": 1.2499,
"step": 4600
},
{
"epoch": 0.04592759075585088,
"grad_norm": 0.2850038707256317,
"learning_rate": 4.770828243342292e-05,
"loss": 1.2446,
"step": 4700
},
{
"epoch": 0.046904773537890264,
"grad_norm": 1.2032625675201416,
"learning_rate": 4.765941851942341e-05,
"loss": 1.2238,
"step": 4800
},
{
"epoch": 0.04788195631992964,
"grad_norm": 0.42024949193000793,
"learning_rate": 4.7610554605423896e-05,
"loss": 1.2255,
"step": 4900
},
{
"epoch": 0.048859139101969026,
"grad_norm": 0.7451406121253967,
"learning_rate": 4.756169069142439e-05,
"loss": 1.2071,
"step": 5000
},
{
"epoch": 0.049836321884008404,
"grad_norm": 0.8735096454620361,
"learning_rate": 4.751282677742487e-05,
"loss": 1.2126,
"step": 5100
},
{
"epoch": 0.05081350466604778,
"grad_norm": 0.73675137758255,
"learning_rate": 4.746396286342537e-05,
"loss": 1.2036,
"step": 5200
},
{
"epoch": 0.051790687448087167,
"grad_norm": 0.6540606617927551,
"learning_rate": 4.741509894942585e-05,
"loss": 1.1825,
"step": 5300
},
{
"epoch": 0.052767870230126544,
"grad_norm": 0.825066864490509,
"learning_rate": 4.7366235035426336e-05,
"loss": 1.1655,
"step": 5400
},
{
"epoch": 0.05374505301216593,
"grad_norm": 1.6421219110488892,
"learning_rate": 4.731737112142683e-05,
"loss": 1.1716,
"step": 5500
},
{
"epoch": 0.05472223579420531,
"grad_norm": 1.0644057989120483,
"learning_rate": 4.726850720742731e-05,
"loss": 1.1384,
"step": 5600
},
{
"epoch": 0.055699418576244684,
"grad_norm": 1.1611616611480713,
"learning_rate": 4.721964329342781e-05,
"loss": 1.1499,
"step": 5700
},
{
"epoch": 0.05667660135828407,
"grad_norm": 2.0900723934173584,
"learning_rate": 4.7170779379428295e-05,
"loss": 1.1323,
"step": 5800
},
{
"epoch": 0.05765378414032345,
"grad_norm": 1.0580404996871948,
"learning_rate": 4.712191546542878e-05,
"loss": 1.112,
"step": 5900
},
{
"epoch": 0.058630966922362825,
"grad_norm": 0.6299407482147217,
"learning_rate": 4.707305155142927e-05,
"loss": 1.104,
"step": 6000
},
{
"epoch": 0.05960814970440221,
"grad_norm": 0.6816271543502808,
"learning_rate": 4.702418763742976e-05,
"loss": 1.1128,
"step": 6100
},
{
"epoch": 0.06058533248644159,
"grad_norm": 0.654796302318573,
"learning_rate": 4.697532372343025e-05,
"loss": 1.0942,
"step": 6200
},
{
"epoch": 0.06156251526848097,
"grad_norm": 1.0433884859085083,
"learning_rate": 4.692645980943074e-05,
"loss": 1.0862,
"step": 6300
},
{
"epoch": 0.06253969805052036,
"grad_norm": 0.6256537437438965,
"learning_rate": 4.6877595895431224e-05,
"loss": 1.081,
"step": 6400
},
{
"epoch": 0.06351688083255973,
"grad_norm": 0.8173975348472595,
"learning_rate": 4.682873198143172e-05,
"loss": 1.0767,
"step": 6500
},
{
"epoch": 0.06449406361459911,
"grad_norm": 0.7856473922729492,
"learning_rate": 4.6779868067432206e-05,
"loss": 1.0767,
"step": 6600
},
{
"epoch": 0.0654712463966385,
"grad_norm": 0.6337741017341614,
"learning_rate": 4.6731004153432695e-05,
"loss": 1.0829,
"step": 6700
},
{
"epoch": 0.06644842917867787,
"grad_norm": 0.5813809037208557,
"learning_rate": 4.668214023943318e-05,
"loss": 1.0571,
"step": 6800
},
{
"epoch": 0.06742561196071725,
"grad_norm": 0.4155445992946625,
"learning_rate": 4.6633276325433664e-05,
"loss": 1.0707,
"step": 6900
},
{
"epoch": 0.06840279474275664,
"grad_norm": 0.6730567812919617,
"learning_rate": 4.658441241143416e-05,
"loss": 1.0477,
"step": 7000
},
{
"epoch": 0.06937997752479601,
"grad_norm": 0.8348300457000732,
"learning_rate": 4.653554849743465e-05,
"loss": 1.0644,
"step": 7100
},
{
"epoch": 0.07035716030683539,
"grad_norm": 2.2414326667785645,
"learning_rate": 4.6486684583435135e-05,
"loss": 1.0577,
"step": 7200
},
{
"epoch": 0.07133434308887478,
"grad_norm": 1.6573911905288696,
"learning_rate": 4.643782066943562e-05,
"loss": 1.0836,
"step": 7300
},
{
"epoch": 0.07231152587091415,
"grad_norm": 0.5690039396286011,
"learning_rate": 4.638895675543611e-05,
"loss": 1.0541,
"step": 7400
},
{
"epoch": 0.07328870865295353,
"grad_norm": 0.527215301990509,
"learning_rate": 4.63400928414366e-05,
"loss": 1.0164,
"step": 7500
},
{
"epoch": 0.07426589143499292,
"grad_norm": 0.7997362613677979,
"learning_rate": 4.6291228927437094e-05,
"loss": 1.0447,
"step": 7600
},
{
"epoch": 0.0752430742170323,
"grad_norm": 2.257143259048462,
"learning_rate": 4.6242365013437575e-05,
"loss": 1.0365,
"step": 7700
},
{
"epoch": 0.07622025699907167,
"grad_norm": 0.9132490158081055,
"learning_rate": 4.619350109943807e-05,
"loss": 1.0498,
"step": 7800
},
{
"epoch": 0.07719743978111106,
"grad_norm": 0.5229859948158264,
"learning_rate": 4.614463718543856e-05,
"loss": 1.0342,
"step": 7900
},
{
"epoch": 0.07817462256315044,
"grad_norm": 0.6948792338371277,
"learning_rate": 4.6095773271439046e-05,
"loss": 1.0325,
"step": 8000
},
{
"epoch": 0.07915180534518981,
"grad_norm": 0.8526360988616943,
"learning_rate": 4.6046909357439534e-05,
"loss": 1.0183,
"step": 8100
},
{
"epoch": 0.0801289881272292,
"grad_norm": 1.1457374095916748,
"learning_rate": 4.599804544344002e-05,
"loss": 1.0243,
"step": 8200
},
{
"epoch": 0.08110617090926858,
"grad_norm": 0.9335997700691223,
"learning_rate": 4.594918152944051e-05,
"loss": 1.046,
"step": 8300
},
{
"epoch": 0.08208335369130795,
"grad_norm": 0.8367229700088501,
"learning_rate": 4.5900317615441e-05,
"loss": 1.0176,
"step": 8400
},
{
"epoch": 0.08306053647334734,
"grad_norm": 3.7648801803588867,
"learning_rate": 4.5851453701441486e-05,
"loss": 1.0047,
"step": 8500
},
{
"epoch": 0.08403771925538672,
"grad_norm": 0.5877612829208374,
"learning_rate": 4.5802589787441975e-05,
"loss": 1.0346,
"step": 8600
},
{
"epoch": 0.08501490203742611,
"grad_norm": 0.5145990252494812,
"learning_rate": 4.575372587344246e-05,
"loss": 1.0268,
"step": 8700
},
{
"epoch": 0.08599208481946548,
"grad_norm": 0.9310688376426697,
"learning_rate": 4.570486195944295e-05,
"loss": 1.0109,
"step": 8800
},
{
"epoch": 0.08696926760150486,
"grad_norm": 0.5182886719703674,
"learning_rate": 4.5655998045443445e-05,
"loss": 1.0117,
"step": 8900
},
{
"epoch": 0.08794645038354425,
"grad_norm": 0.4319695234298706,
"learning_rate": 4.560713413144393e-05,
"loss": 1.0053,
"step": 9000
},
{
"epoch": 0.08892363316558362,
"grad_norm": 4.307732582092285,
"learning_rate": 4.555827021744442e-05,
"loss": 1.0151,
"step": 9100
},
{
"epoch": 0.089900815947623,
"grad_norm": 0.46516236662864685,
"learning_rate": 4.550940630344491e-05,
"loss": 0.9945,
"step": 9200
},
{
"epoch": 0.09087799872966239,
"grad_norm": 1.2372952699661255,
"learning_rate": 4.54605423894454e-05,
"loss": 0.9865,
"step": 9300
},
{
"epoch": 0.09185518151170176,
"grad_norm": 0.7494595646858215,
"learning_rate": 4.5411678475445886e-05,
"loss": 0.9824,
"step": 9400
},
{
"epoch": 0.09283236429374114,
"grad_norm": 0.5540333390235901,
"learning_rate": 4.5362814561446374e-05,
"loss": 1.0132,
"step": 9500
},
{
"epoch": 0.09380954707578053,
"grad_norm": 0.48533427715301514,
"learning_rate": 4.531395064744686e-05,
"loss": 1.0173,
"step": 9600
},
{
"epoch": 0.0947867298578199,
"grad_norm": 0.4972572922706604,
"learning_rate": 4.526508673344736e-05,
"loss": 1.0078,
"step": 9700
},
{
"epoch": 0.09576391263985928,
"grad_norm": 0.6748878955841064,
"learning_rate": 4.521622281944784e-05,
"loss": 1.0172,
"step": 9800
},
{
"epoch": 0.09674109542189867,
"grad_norm": 0.5261876583099365,
"learning_rate": 4.5167358905448326e-05,
"loss": 1.0189,
"step": 9900
},
{
"epoch": 0.09771827820393805,
"grad_norm": 0.4164600670337677,
"learning_rate": 4.5118494991448814e-05,
"loss": 0.9978,
"step": 10000
},
{
"epoch": 0.09869546098597742,
"grad_norm": 0.40417763590812683,
"learning_rate": 4.50696310774493e-05,
"loss": 1.0103,
"step": 10100
},
{
"epoch": 0.09967264376801681,
"grad_norm": 0.8591890931129456,
"learning_rate": 4.50207671634498e-05,
"loss": 1.0065,
"step": 10200
},
{
"epoch": 0.10064982655005619,
"grad_norm": 0.5676371455192566,
"learning_rate": 4.497190324945028e-05,
"loss": 1.0089,
"step": 10300
},
{
"epoch": 0.10162700933209556,
"grad_norm": 0.616646945476532,
"learning_rate": 4.492303933545077e-05,
"loss": 0.9897,
"step": 10400
},
{
"epoch": 0.10260419211413495,
"grad_norm": 0.37536484003067017,
"learning_rate": 4.487417542145126e-05,
"loss": 0.9989,
"step": 10500
},
{
"epoch": 0.10358137489617433,
"grad_norm": 0.6801789402961731,
"learning_rate": 4.482531150745175e-05,
"loss": 0.9923,
"step": 10600
},
{
"epoch": 0.1045585576782137,
"grad_norm": 0.5848776698112488,
"learning_rate": 4.477644759345224e-05,
"loss": 0.9919,
"step": 10700
},
{
"epoch": 0.10553574046025309,
"grad_norm": 0.7715157866477966,
"learning_rate": 4.4727583679452725e-05,
"loss": 0.9814,
"step": 10800
},
{
"epoch": 0.10651292324229247,
"grad_norm": 0.8080986142158508,
"learning_rate": 4.4678719765453214e-05,
"loss": 0.9935,
"step": 10900
},
{
"epoch": 0.10749010602433186,
"grad_norm": 0.4375016391277313,
"learning_rate": 4.462985585145371e-05,
"loss": 0.988,
"step": 11000
},
{
"epoch": 0.10846728880637123,
"grad_norm": 0.8055805563926697,
"learning_rate": 4.458099193745419e-05,
"loss": 0.9861,
"step": 11100
},
{
"epoch": 0.10944447158841061,
"grad_norm": 1.1914618015289307,
"learning_rate": 4.4532128023454685e-05,
"loss": 0.9622,
"step": 11200
},
{
"epoch": 0.11042165437045,
"grad_norm": 0.4247540533542633,
"learning_rate": 4.448326410945517e-05,
"loss": 0.9602,
"step": 11300
},
{
"epoch": 0.11139883715248937,
"grad_norm": 0.5454650521278381,
"learning_rate": 4.4434400195455654e-05,
"loss": 0.9696,
"step": 11400
},
{
"epoch": 0.11237601993452875,
"grad_norm": 0.5259748697280884,
"learning_rate": 4.438553628145615e-05,
"loss": 1.0021,
"step": 11500
},
{
"epoch": 0.11335320271656814,
"grad_norm": 0.5165246725082397,
"learning_rate": 4.433667236745663e-05,
"loss": 0.982,
"step": 11600
},
{
"epoch": 0.11433038549860751,
"grad_norm": 0.6768147945404053,
"learning_rate": 4.4287808453457125e-05,
"loss": 0.9398,
"step": 11700
},
{
"epoch": 0.1153075682806469,
"grad_norm": 1.0245041847229004,
"learning_rate": 4.423894453945761e-05,
"loss": 0.9934,
"step": 11800
},
{
"epoch": 0.11628475106268628,
"grad_norm": 0.6241583228111267,
"learning_rate": 4.41900806254581e-05,
"loss": 0.9697,
"step": 11900
},
{
"epoch": 0.11726193384472565,
"grad_norm": 0.4234873652458191,
"learning_rate": 4.414121671145859e-05,
"loss": 0.9723,
"step": 12000
},
{
"epoch": 0.11823911662676503,
"grad_norm": 0.3932545781135559,
"learning_rate": 4.409235279745908e-05,
"loss": 0.9826,
"step": 12100
},
{
"epoch": 0.11921629940880442,
"grad_norm": 1.5067880153656006,
"learning_rate": 4.4043488883459565e-05,
"loss": 0.9581,
"step": 12200
},
{
"epoch": 0.1201934821908438,
"grad_norm": 0.41707366704940796,
"learning_rate": 4.399462496946006e-05,
"loss": 0.9666,
"step": 12300
},
{
"epoch": 0.12117066497288317,
"grad_norm": 1.1278653144836426,
"learning_rate": 4.394576105546054e-05,
"loss": 0.9553,
"step": 12400
},
{
"epoch": 0.12214784775492256,
"grad_norm": 0.350543737411499,
"learning_rate": 4.3896897141461036e-05,
"loss": 0.9422,
"step": 12500
},
{
"epoch": 0.12312503053696194,
"grad_norm": 0.3775838315486908,
"learning_rate": 4.3848033227461524e-05,
"loss": 0.9626,
"step": 12600
},
{
"epoch": 0.12410221331900131,
"grad_norm": 0.8341017365455627,
"learning_rate": 4.379916931346201e-05,
"loss": 0.9289,
"step": 12700
},
{
"epoch": 0.1250793961010407,
"grad_norm": 0.805614173412323,
"learning_rate": 4.37503053994625e-05,
"loss": 0.9474,
"step": 12800
},
{
"epoch": 0.12605657888308008,
"grad_norm": 0.8439397215843201,
"learning_rate": 4.370144148546299e-05,
"loss": 0.9661,
"step": 12900
},
{
"epoch": 0.12703376166511945,
"grad_norm": 1.1272892951965332,
"learning_rate": 4.3652577571463476e-05,
"loss": 0.9514,
"step": 13000
},
{
"epoch": 0.12801094444715885,
"grad_norm": 0.6426375508308411,
"learning_rate": 4.3603713657463965e-05,
"loss": 0.9448,
"step": 13100
},
{
"epoch": 0.12898812722919822,
"grad_norm": 1.3205431699752808,
"learning_rate": 4.355484974346445e-05,
"loss": 0.9511,
"step": 13200
},
{
"epoch": 0.1299653100112376,
"grad_norm": 0.3671954870223999,
"learning_rate": 4.350598582946494e-05,
"loss": 0.9506,
"step": 13300
},
{
"epoch": 0.130942492793277,
"grad_norm": 0.7566332817077637,
"learning_rate": 4.345712191546543e-05,
"loss": 0.9363,
"step": 13400
},
{
"epoch": 0.13191967557531636,
"grad_norm": 0.8800159692764282,
"learning_rate": 4.340825800146592e-05,
"loss": 0.9388,
"step": 13500
},
{
"epoch": 0.13289685835735573,
"grad_norm": 0.7134628891944885,
"learning_rate": 4.335939408746641e-05,
"loss": 0.9162,
"step": 13600
},
{
"epoch": 0.13387404113939513,
"grad_norm": 0.5555543899536133,
"learning_rate": 4.331053017346689e-05,
"loss": 0.9366,
"step": 13700
},
{
"epoch": 0.1348512239214345,
"grad_norm": 0.4485512375831604,
"learning_rate": 4.326166625946739e-05,
"loss": 0.9286,
"step": 13800
},
{
"epoch": 0.13582840670347388,
"grad_norm": 0.8888948559761047,
"learning_rate": 4.3212802345467876e-05,
"loss": 0.943,
"step": 13900
},
{
"epoch": 0.13680558948551327,
"grad_norm": 0.6719749569892883,
"learning_rate": 4.3163938431468364e-05,
"loss": 0.9217,
"step": 14000
},
{
"epoch": 0.13778277226755264,
"grad_norm": 0.695377767086029,
"learning_rate": 4.311507451746885e-05,
"loss": 0.9093,
"step": 14100
},
{
"epoch": 0.13875995504959202,
"grad_norm": 0.5966312885284424,
"learning_rate": 4.306621060346934e-05,
"loss": 0.9195,
"step": 14200
},
{
"epoch": 0.13973713783163141,
"grad_norm": 0.8073310256004333,
"learning_rate": 4.301734668946983e-05,
"loss": 0.9309,
"step": 14300
},
{
"epoch": 0.14071432061367078,
"grad_norm": 0.6303800940513611,
"learning_rate": 4.2968482775470316e-05,
"loss": 0.9458,
"step": 14400
},
{
"epoch": 0.14169150339571016,
"grad_norm": 0.7043970823287964,
"learning_rate": 4.2919618861470804e-05,
"loss": 0.9132,
"step": 14500
},
{
"epoch": 0.14266868617774955,
"grad_norm": 0.9100736379623413,
"learning_rate": 4.287075494747129e-05,
"loss": 0.9296,
"step": 14600
},
{
"epoch": 0.14364586895978892,
"grad_norm": 0.787862241268158,
"learning_rate": 4.282189103347179e-05,
"loss": 0.9643,
"step": 14700
},
{
"epoch": 0.1446230517418283,
"grad_norm": 0.8169028162956238,
"learning_rate": 4.277302711947227e-05,
"loss": 0.9244,
"step": 14800
},
{
"epoch": 0.1456002345238677,
"grad_norm": 0.9544184803962708,
"learning_rate": 4.272416320547276e-05,
"loss": 0.918,
"step": 14900
},
{
"epoch": 0.14657741730590707,
"grad_norm": 0.5325574278831482,
"learning_rate": 4.2675299291473245e-05,
"loss": 0.9273,
"step": 15000
},
{
"epoch": 0.14755460008794646,
"grad_norm": 1.1403323411941528,
"learning_rate": 4.262643537747374e-05,
"loss": 0.9095,
"step": 15100
},
{
"epoch": 0.14853178286998583,
"grad_norm": 1.0411937236785889,
"learning_rate": 4.257757146347423e-05,
"loss": 0.8967,
"step": 15200
},
{
"epoch": 0.1495089656520252,
"grad_norm": 0.630393922328949,
"learning_rate": 4.2528707549474715e-05,
"loss": 0.8883,
"step": 15300
},
{
"epoch": 0.1504861484340646,
"grad_norm": 0.9445775747299194,
"learning_rate": 4.2479843635475204e-05,
"loss": 0.9253,
"step": 15400
},
{
"epoch": 0.15146333121610397,
"grad_norm": 0.5689444541931152,
"learning_rate": 4.243097972147569e-05,
"loss": 0.8983,
"step": 15500
},
{
"epoch": 0.15244051399814335,
"grad_norm": 0.7726677656173706,
"learning_rate": 4.238211580747618e-05,
"loss": 0.9228,
"step": 15600
},
{
"epoch": 0.15341769678018274,
"grad_norm": 0.8260165452957153,
"learning_rate": 4.2333251893476675e-05,
"loss": 0.9202,
"step": 15700
},
{
"epoch": 0.15439487956222211,
"grad_norm": 0.4869302809238434,
"learning_rate": 4.2284387979477156e-05,
"loss": 0.9283,
"step": 15800
},
{
"epoch": 0.15537206234426149,
"grad_norm": 0.5768991708755493,
"learning_rate": 4.2235524065477644e-05,
"loss": 0.9233,
"step": 15900
},
{
"epoch": 0.15634924512630088,
"grad_norm": 0.8856435418128967,
"learning_rate": 4.218666015147814e-05,
"loss": 0.8825,
"step": 16000
},
{
"epoch": 0.15732642790834026,
"grad_norm": 0.5258185267448425,
"learning_rate": 4.213779623747862e-05,
"loss": 0.8834,
"step": 16100
},
{
"epoch": 0.15830361069037963,
"grad_norm": 0.8340526223182678,
"learning_rate": 4.2088932323479115e-05,
"loss": 0.8856,
"step": 16200
},
{
"epoch": 0.15928079347241902,
"grad_norm": 0.4123723804950714,
"learning_rate": 4.2040068409479596e-05,
"loss": 0.8957,
"step": 16300
},
{
"epoch": 0.1602579762544584,
"grad_norm": 0.8336274027824402,
"learning_rate": 4.199120449548009e-05,
"loss": 0.9053,
"step": 16400
},
{
"epoch": 0.16123515903649777,
"grad_norm": 0.7977516055107117,
"learning_rate": 4.194234058148058e-05,
"loss": 0.8698,
"step": 16500
},
{
"epoch": 0.16221234181853716,
"grad_norm": 0.5064985156059265,
"learning_rate": 4.189347666748107e-05,
"loss": 0.8945,
"step": 16600
},
{
"epoch": 0.16318952460057654,
"grad_norm": 0.8241267204284668,
"learning_rate": 4.1844612753481555e-05,
"loss": 0.8875,
"step": 16700
},
{
"epoch": 0.1641667073826159,
"grad_norm": 0.7517113089561462,
"learning_rate": 4.179574883948204e-05,
"loss": 0.8845,
"step": 16800
},
{
"epoch": 0.1651438901646553,
"grad_norm": 0.6297169923782349,
"learning_rate": 4.174688492548253e-05,
"loss": 0.9303,
"step": 16900
},
{
"epoch": 0.16612107294669468,
"grad_norm": 0.5828490257263184,
"learning_rate": 4.1698021011483026e-05,
"loss": 0.8654,
"step": 17000
},
{
"epoch": 0.16709825572873405,
"grad_norm": 0.3038561940193176,
"learning_rate": 4.164915709748351e-05,
"loss": 0.8933,
"step": 17100
},
{
"epoch": 0.16807543851077344,
"grad_norm": 0.8928827047348022,
"learning_rate": 4.1600293183484e-05,
"loss": 0.8509,
"step": 17200
},
{
"epoch": 0.16905262129281282,
"grad_norm": 0.7055086493492126,
"learning_rate": 4.155142926948449e-05,
"loss": 0.8814,
"step": 17300
},
{
"epoch": 0.17002980407485221,
"grad_norm": 0.5377823710441589,
"learning_rate": 4.150256535548497e-05,
"loss": 0.888,
"step": 17400
},
{
"epoch": 0.17100698685689159,
"grad_norm": 0.6319778561592102,
"learning_rate": 4.1453701441485466e-05,
"loss": 0.8575,
"step": 17500
},
{
"epoch": 0.17198416963893096,
"grad_norm": 0.8756042122840881,
"learning_rate": 4.1404837527485954e-05,
"loss": 0.8805,
"step": 17600
},
{
"epoch": 0.17296135242097035,
"grad_norm": 0.5293178558349609,
"learning_rate": 4.135597361348644e-05,
"loss": 0.8471,
"step": 17700
},
{
"epoch": 0.17393853520300973,
"grad_norm": 0.9118284583091736,
"learning_rate": 4.130710969948693e-05,
"loss": 0.8426,
"step": 17800
},
{
"epoch": 0.1749157179850491,
"grad_norm": 1.0211195945739746,
"learning_rate": 4.125824578548742e-05,
"loss": 0.8877,
"step": 17900
},
{
"epoch": 0.1758929007670885,
"grad_norm": 1.4174985885620117,
"learning_rate": 4.120938187148791e-05,
"loss": 0.8731,
"step": 18000
},
{
"epoch": 0.17687008354912787,
"grad_norm": 0.8243415951728821,
"learning_rate": 4.1160517957488395e-05,
"loss": 0.8852,
"step": 18100
},
{
"epoch": 0.17784726633116724,
"grad_norm": 0.8385602235794067,
"learning_rate": 4.111165404348888e-05,
"loss": 0.8361,
"step": 18200
},
{
"epoch": 0.17882444911320663,
"grad_norm": 1.003968358039856,
"learning_rate": 4.106279012948938e-05,
"loss": 0.8738,
"step": 18300
},
{
"epoch": 0.179801631895246,
"grad_norm": 0.7428449988365173,
"learning_rate": 4.101392621548986e-05,
"loss": 0.8563,
"step": 18400
},
{
"epoch": 0.18077881467728538,
"grad_norm": 1.8963735103607178,
"learning_rate": 4.0965062301490354e-05,
"loss": 0.8428,
"step": 18500
},
{
"epoch": 0.18175599745932478,
"grad_norm": 0.6868895888328552,
"learning_rate": 4.091619838749084e-05,
"loss": 0.8727,
"step": 18600
},
{
"epoch": 0.18273318024136415,
"grad_norm": 1.8936256170272827,
"learning_rate": 4.086733447349133e-05,
"loss": 0.9211,
"step": 18700
},
{
"epoch": 0.18371036302340352,
"grad_norm": 1.004941463470459,
"learning_rate": 4.081847055949182e-05,
"loss": 0.8404,
"step": 18800
},
{
"epoch": 0.18468754580544292,
"grad_norm": 1.4084818363189697,
"learning_rate": 4.0769606645492306e-05,
"loss": 0.868,
"step": 18900
},
{
"epoch": 0.1856647285874823,
"grad_norm": 0.6459541320800781,
"learning_rate": 4.0720742731492794e-05,
"loss": 0.8583,
"step": 19000
},
{
"epoch": 0.18664191136952166,
"grad_norm": 0.7335548996925354,
"learning_rate": 4.067187881749328e-05,
"loss": 0.8622,
"step": 19100
},
{
"epoch": 0.18761909415156106,
"grad_norm": 0.6783348321914673,
"learning_rate": 4.062301490349377e-05,
"loss": 0.8572,
"step": 19200
},
{
"epoch": 0.18859627693360043,
"grad_norm": 0.6323419809341431,
"learning_rate": 4.057415098949426e-05,
"loss": 0.8763,
"step": 19300
},
{
"epoch": 0.1895734597156398,
"grad_norm": 0.963927686214447,
"learning_rate": 4.052528707549475e-05,
"loss": 0.8543,
"step": 19400
},
{
"epoch": 0.1905506424976792,
"grad_norm": 0.4785550832748413,
"learning_rate": 4.0476423161495234e-05,
"loss": 0.863,
"step": 19500
},
{
"epoch": 0.19152782527971857,
"grad_norm": 0.6358627080917358,
"learning_rate": 4.042755924749573e-05,
"loss": 0.8842,
"step": 19600
},
{
"epoch": 0.19250500806175797,
"grad_norm": 0.7857956886291504,
"learning_rate": 4.037869533349621e-05,
"loss": 0.8698,
"step": 19700
},
{
"epoch": 0.19348219084379734,
"grad_norm": 0.5225537419319153,
"learning_rate": 4.0329831419496705e-05,
"loss": 0.8842,
"step": 19800
},
{
"epoch": 0.1944593736258367,
"grad_norm": 0.582313597202301,
"learning_rate": 4.0280967505497194e-05,
"loss": 0.8506,
"step": 19900
},
{
"epoch": 0.1954365564078761,
"grad_norm": 0.7206740379333496,
"learning_rate": 4.023210359149768e-05,
"loss": 0.8529,
"step": 20000
},
{
"epoch": 0.19641373918991548,
"grad_norm": 0.45054760575294495,
"learning_rate": 4.018323967749817e-05,
"loss": 0.8564,
"step": 20100
},
{
"epoch": 0.19739092197195485,
"grad_norm": 0.9214595556259155,
"learning_rate": 4.013437576349866e-05,
"loss": 0.8443,
"step": 20200
},
{
"epoch": 0.19836810475399425,
"grad_norm": 0.9843263626098633,
"learning_rate": 4.0085511849499146e-05,
"loss": 0.856,
"step": 20300
},
{
"epoch": 0.19934528753603362,
"grad_norm": 0.6508098840713501,
"learning_rate": 4.0036647935499634e-05,
"loss": 0.8532,
"step": 20400
},
{
"epoch": 0.200322470318073,
"grad_norm": 0.8091655969619751,
"learning_rate": 3.998778402150012e-05,
"loss": 0.8691,
"step": 20500
},
{
"epoch": 0.20129965310011239,
"grad_norm": 0.8139657378196716,
"learning_rate": 3.993892010750061e-05,
"loss": 0.8608,
"step": 20600
},
{
"epoch": 0.20227683588215176,
"grad_norm": 0.628423273563385,
"learning_rate": 3.9890056193501105e-05,
"loss": 0.8369,
"step": 20700
},
{
"epoch": 0.20325401866419113,
"grad_norm": 1.737331748008728,
"learning_rate": 3.9841192279501586e-05,
"loss": 0.8363,
"step": 20800
},
{
"epoch": 0.20423120144623053,
"grad_norm": 1.036280870437622,
"learning_rate": 3.979232836550208e-05,
"loss": 0.8387,
"step": 20900
},
{
"epoch": 0.2052083842282699,
"grad_norm": 0.35834863781929016,
"learning_rate": 3.974346445150256e-05,
"loss": 0.8565,
"step": 21000
},
{
"epoch": 0.20618556701030927,
"grad_norm": 0.7657331824302673,
"learning_rate": 3.969460053750306e-05,
"loss": 0.8654,
"step": 21100
},
{
"epoch": 0.20716274979234867,
"grad_norm": 1.077300786972046,
"learning_rate": 3.9645736623503545e-05,
"loss": 0.8218,
"step": 21200
},
{
"epoch": 0.20813993257438804,
"grad_norm": 0.5806353688240051,
"learning_rate": 3.959687270950403e-05,
"loss": 0.8375,
"step": 21300
},
{
"epoch": 0.2091171153564274,
"grad_norm": 0.3875705599784851,
"learning_rate": 3.954800879550452e-05,
"loss": 0.8342,
"step": 21400
},
{
"epoch": 0.2100942981384668,
"grad_norm": 0.7829961180686951,
"learning_rate": 3.949914488150501e-05,
"loss": 0.832,
"step": 21500
},
{
"epoch": 0.21107148092050618,
"grad_norm": 1.9466382265090942,
"learning_rate": 3.94502809675055e-05,
"loss": 0.8118,
"step": 21600
},
{
"epoch": 0.21204866370254555,
"grad_norm": 0.6271357536315918,
"learning_rate": 3.940141705350599e-05,
"loss": 0.8436,
"step": 21700
},
{
"epoch": 0.21302584648458495,
"grad_norm": 1.320719838142395,
"learning_rate": 3.9352553139506474e-05,
"loss": 0.8586,
"step": 21800
},
{
"epoch": 0.21400302926662432,
"grad_norm": 0.6017069220542908,
"learning_rate": 3.930368922550697e-05,
"loss": 0.8242,
"step": 21900
},
{
"epoch": 0.21498021204866372,
"grad_norm": 0.8584203124046326,
"learning_rate": 3.9254825311507456e-05,
"loss": 0.815,
"step": 22000
},
{
"epoch": 0.2159573948307031,
"grad_norm": 0.623652458190918,
"learning_rate": 3.920596139750794e-05,
"loss": 0.812,
"step": 22100
},
{
"epoch": 0.21693457761274246,
"grad_norm": 0.6867117881774902,
"learning_rate": 3.915709748350843e-05,
"loss": 0.8141,
"step": 22200
},
{
"epoch": 0.21791176039478186,
"grad_norm": 0.6963294744491577,
"learning_rate": 3.910823356950892e-05,
"loss": 0.8227,
"step": 22300
},
{
"epoch": 0.21888894317682123,
"grad_norm": 0.6727440357208252,
"learning_rate": 3.905936965550941e-05,
"loss": 0.8285,
"step": 22400
},
{
"epoch": 0.2198661259588606,
"grad_norm": 1.261771559715271,
"learning_rate": 3.90105057415099e-05,
"loss": 0.8396,
"step": 22500
},
{
"epoch": 0.2208433087409,
"grad_norm": 0.9146804809570312,
"learning_rate": 3.8961641827510385e-05,
"loss": 0.8194,
"step": 22600
},
{
"epoch": 0.22182049152293937,
"grad_norm": 0.9350225329399109,
"learning_rate": 3.891277791351087e-05,
"loss": 0.8376,
"step": 22700
},
{
"epoch": 0.22279767430497874,
"grad_norm": 0.6317518353462219,
"learning_rate": 3.886391399951137e-05,
"loss": 0.8313,
"step": 22800
},
{
"epoch": 0.22377485708701814,
"grad_norm": 0.6716780662536621,
"learning_rate": 3.881505008551185e-05,
"loss": 0.8033,
"step": 22900
},
{
"epoch": 0.2247520398690575,
"grad_norm": 0.4494755268096924,
"learning_rate": 3.8766186171512344e-05,
"loss": 0.8047,
"step": 23000
},
{
"epoch": 0.22572922265109688,
"grad_norm": 0.5505642890930176,
"learning_rate": 3.8717322257512825e-05,
"loss": 0.8456,
"step": 23100
},
{
"epoch": 0.22670640543313628,
"grad_norm": 0.8866478800773621,
"learning_rate": 3.866845834351332e-05,
"loss": 0.8105,
"step": 23200
},
{
"epoch": 0.22768358821517565,
"grad_norm": 0.7525384426116943,
"learning_rate": 3.861959442951381e-05,
"loss": 0.8292,
"step": 23300
},
{
"epoch": 0.22866077099721502,
"grad_norm": 0.8182941675186157,
"learning_rate": 3.8570730515514296e-05,
"loss": 0.8392,
"step": 23400
},
{
"epoch": 0.22963795377925442,
"grad_norm": 0.6246720552444458,
"learning_rate": 3.8521866601514784e-05,
"loss": 0.8292,
"step": 23500
},
{
"epoch": 0.2306151365612938,
"grad_norm": 0.7931325435638428,
"learning_rate": 3.847300268751527e-05,
"loss": 0.83,
"step": 23600
},
{
"epoch": 0.23159231934333316,
"grad_norm": 0.4839908480644226,
"learning_rate": 3.842413877351576e-05,
"loss": 0.8544,
"step": 23700
},
{
"epoch": 0.23256950212537256,
"grad_norm": 0.694095253944397,
"learning_rate": 3.837527485951625e-05,
"loss": 0.8168,
"step": 23800
},
{
"epoch": 0.23354668490741193,
"grad_norm": 0.6341009140014648,
"learning_rate": 3.8326410945516736e-05,
"loss": 0.8007,
"step": 23900
},
{
"epoch": 0.2345238676894513,
"grad_norm": 0.6198739409446716,
"learning_rate": 3.8277547031517224e-05,
"loss": 0.8222,
"step": 24000
},
{
"epoch": 0.2355010504714907,
"grad_norm": 0.7246755361557007,
"learning_rate": 3.822868311751772e-05,
"loss": 0.8239,
"step": 24100
},
{
"epoch": 0.23647823325353007,
"grad_norm": 1.1782780885696411,
"learning_rate": 3.81798192035182e-05,
"loss": 0.8069,
"step": 24200
},
{
"epoch": 0.23745541603556947,
"grad_norm": 0.7902185320854187,
"learning_rate": 3.8130955289518695e-05,
"loss": 0.8283,
"step": 24300
},
{
"epoch": 0.23843259881760884,
"grad_norm": 1.605393648147583,
"learning_rate": 3.808209137551918e-05,
"loss": 0.7758,
"step": 24400
},
{
"epoch": 0.2394097815996482,
"grad_norm": 0.5076558589935303,
"learning_rate": 3.803322746151967e-05,
"loss": 0.8178,
"step": 24500
},
{
"epoch": 0.2403869643816876,
"grad_norm": 0.777646005153656,
"learning_rate": 3.798436354752016e-05,
"loss": 0.8074,
"step": 24600
},
{
"epoch": 0.24136414716372698,
"grad_norm": 1.3850637674331665,
"learning_rate": 3.793549963352065e-05,
"loss": 0.8058,
"step": 24700
},
{
"epoch": 0.24234132994576635,
"grad_norm": 0.6476046442985535,
"learning_rate": 3.7886635719521136e-05,
"loss": 0.7967,
"step": 24800
},
{
"epoch": 0.24331851272780575,
"grad_norm": 0.5768633484840393,
"learning_rate": 3.7837771805521624e-05,
"loss": 0.8269,
"step": 24900
},
{
"epoch": 0.24429569550984512,
"grad_norm": 0.7800481915473938,
"learning_rate": 3.778890789152211e-05,
"loss": 0.8237,
"step": 25000
},
{
"epoch": 0.2452728782918845,
"grad_norm": 0.591273844242096,
"learning_rate": 3.77400439775226e-05,
"loss": 0.8045,
"step": 25100
},
{
"epoch": 0.2462500610739239,
"grad_norm": 0.5170730352401733,
"learning_rate": 3.769118006352309e-05,
"loss": 0.818,
"step": 25200
},
{
"epoch": 0.24722724385596326,
"grad_norm": 0.7280113101005554,
"learning_rate": 3.7642316149523576e-05,
"loss": 0.806,
"step": 25300
},
{
"epoch": 0.24820442663800263,
"grad_norm": 0.48092082142829895,
"learning_rate": 3.759345223552407e-05,
"loss": 0.804,
"step": 25400
},
{
"epoch": 0.24918160942004203,
"grad_norm": 0.8031238913536072,
"learning_rate": 3.754458832152455e-05,
"loss": 0.8031,
"step": 25500
},
{
"epoch": 0.2501587922020814,
"grad_norm": 0.5290892720222473,
"learning_rate": 3.749572440752505e-05,
"loss": 0.816,
"step": 25600
},
{
"epoch": 0.25113597498412077,
"grad_norm": 1.850685477256775,
"learning_rate": 3.7446860493525535e-05,
"loss": 0.8241,
"step": 25700
},
{
"epoch": 0.25211315776616017,
"grad_norm": 0.9196923971176147,
"learning_rate": 3.739799657952602e-05,
"loss": 0.8115,
"step": 25800
},
{
"epoch": 0.25309034054819957,
"grad_norm": 0.8779144883155823,
"learning_rate": 3.734913266552651e-05,
"loss": 0.8065,
"step": 25900
},
{
"epoch": 0.2540675233302389,
"grad_norm": 0.6696827411651611,
"learning_rate": 3.7300268751527e-05,
"loss": 0.7827,
"step": 26000
},
{
"epoch": 0.2550447061122783,
"grad_norm": 0.5037100315093994,
"learning_rate": 3.725140483752749e-05,
"loss": 0.7955,
"step": 26100
},
{
"epoch": 0.2560218888943177,
"grad_norm": 1.4716683626174927,
"learning_rate": 3.7202540923527975e-05,
"loss": 0.8076,
"step": 26200
},
{
"epoch": 0.25699907167635705,
"grad_norm": 0.7515909671783447,
"learning_rate": 3.7153677009528463e-05,
"loss": 0.7645,
"step": 26300
},
{
"epoch": 0.25797625445839645,
"grad_norm": 0.8641912341117859,
"learning_rate": 3.710481309552896e-05,
"loss": 0.7794,
"step": 26400
},
{
"epoch": 0.25895343724043585,
"grad_norm": 0.7385029792785645,
"learning_rate": 3.705594918152944e-05,
"loss": 0.8047,
"step": 26500
},
{
"epoch": 0.2599306200224752,
"grad_norm": 1.194313645362854,
"learning_rate": 3.700708526752993e-05,
"loss": 0.7973,
"step": 26600
},
{
"epoch": 0.2609078028045146,
"grad_norm": 0.8573377728462219,
"learning_rate": 3.695822135353042e-05,
"loss": 0.8054,
"step": 26700
},
{
"epoch": 0.261884985586554,
"grad_norm": 0.7428358793258667,
"learning_rate": 3.6909357439530904e-05,
"loss": 0.8194,
"step": 26800
},
{
"epoch": 0.26286216836859333,
"grad_norm": 1.1976490020751953,
"learning_rate": 3.68604935255314e-05,
"loss": 0.7745,
"step": 26900
},
{
"epoch": 0.26383935115063273,
"grad_norm": 0.8391226530075073,
"learning_rate": 3.681162961153189e-05,
"loss": 0.7981,
"step": 27000
},
{
"epoch": 0.2648165339326721,
"grad_norm": 1.0753370523452759,
"learning_rate": 3.6762765697532375e-05,
"loss": 0.8018,
"step": 27100
},
{
"epoch": 0.26579371671471147,
"grad_norm": 0.8495202660560608,
"learning_rate": 3.671390178353286e-05,
"loss": 0.7894,
"step": 27200
},
{
"epoch": 0.26677089949675087,
"grad_norm": 2.3333170413970947,
"learning_rate": 3.666503786953335e-05,
"loss": 0.7892,
"step": 27300
},
{
"epoch": 0.26774808227879027,
"grad_norm": 0.7213625311851501,
"learning_rate": 3.661617395553384e-05,
"loss": 0.7902,
"step": 27400
},
{
"epoch": 0.2687252650608296,
"grad_norm": 1.045614242553711,
"learning_rate": 3.6567310041534334e-05,
"loss": 0.7719,
"step": 27500
},
{
"epoch": 0.269702447842869,
"grad_norm": 0.42100274562835693,
"learning_rate": 3.6518446127534815e-05,
"loss": 0.7705,
"step": 27600
},
{
"epoch": 0.2706796306249084,
"grad_norm": 0.5944122076034546,
"learning_rate": 3.646958221353531e-05,
"loss": 0.7717,
"step": 27700
},
{
"epoch": 0.27165681340694775,
"grad_norm": 0.7398585677146912,
"learning_rate": 3.642071829953579e-05,
"loss": 0.7896,
"step": 27800
},
{
"epoch": 0.27263399618898715,
"grad_norm": 0.8064782023429871,
"learning_rate": 3.6371854385536286e-05,
"loss": 0.7917,
"step": 27900
},
{
"epoch": 0.27361117897102655,
"grad_norm": 0.6715266108512878,
"learning_rate": 3.6322990471536774e-05,
"loss": 0.7771,
"step": 28000
},
{
"epoch": 0.2745883617530659,
"grad_norm": 1.1130329370498657,
"learning_rate": 3.6274126557537255e-05,
"loss": 0.7476,
"step": 28100
},
{
"epoch": 0.2755655445351053,
"grad_norm": 0.7601907253265381,
"learning_rate": 3.622526264353775e-05,
"loss": 0.7745,
"step": 28200
},
{
"epoch": 0.2765427273171447,
"grad_norm": 0.8511783480644226,
"learning_rate": 3.617639872953824e-05,
"loss": 0.7737,
"step": 28300
},
{
"epoch": 0.27751991009918403,
"grad_norm": 0.8136917948722839,
"learning_rate": 3.6127534815538726e-05,
"loss": 0.7905,
"step": 28400
},
{
"epoch": 0.27849709288122343,
"grad_norm": 0.5580685138702393,
"learning_rate": 3.6078670901539214e-05,
"loss": 0.7957,
"step": 28500
},
{
"epoch": 0.27947427566326283,
"grad_norm": 0.750845730304718,
"learning_rate": 3.60298069875397e-05,
"loss": 0.7396,
"step": 28600
},
{
"epoch": 0.28045145844530217,
"grad_norm": 0.9611383080482483,
"learning_rate": 3.598094307354019e-05,
"loss": 0.774,
"step": 28700
},
{
"epoch": 0.28142864122734157,
"grad_norm": 0.6622794270515442,
"learning_rate": 3.5932079159540685e-05,
"loss": 0.7993,
"step": 28800
},
{
"epoch": 0.28240582400938097,
"grad_norm": 0.4816977381706238,
"learning_rate": 3.588321524554117e-05,
"loss": 0.7868,
"step": 28900
},
{
"epoch": 0.2833830067914203,
"grad_norm": 0.6779691576957703,
"learning_rate": 3.583435133154166e-05,
"loss": 0.7838,
"step": 29000
},
{
"epoch": 0.2843601895734597,
"grad_norm": 0.9714117646217346,
"learning_rate": 3.578548741754214e-05,
"loss": 0.7686,
"step": 29100
},
{
"epoch": 0.2853373723554991,
"grad_norm": 0.7163410186767578,
"learning_rate": 3.573662350354264e-05,
"loss": 0.7747,
"step": 29200
},
{
"epoch": 0.28631455513753845,
"grad_norm": 0.7338354587554932,
"learning_rate": 3.5687759589543126e-05,
"loss": 0.7703,
"step": 29300
},
{
"epoch": 0.28729173791957785,
"grad_norm": 0.765074610710144,
"learning_rate": 3.5638895675543614e-05,
"loss": 0.7811,
"step": 29400
},
{
"epoch": 0.28826892070161725,
"grad_norm": 0.6714346408843994,
"learning_rate": 3.55900317615441e-05,
"loss": 0.7971,
"step": 29500
},
{
"epoch": 0.2892461034836566,
"grad_norm": 0.6784923672676086,
"learning_rate": 3.554116784754459e-05,
"loss": 0.7704,
"step": 29600
},
{
"epoch": 0.290223286265696,
"grad_norm": 0.6446245312690735,
"learning_rate": 3.549230393354508e-05,
"loss": 0.7843,
"step": 29700
},
{
"epoch": 0.2912004690477354,
"grad_norm": 0.9739934206008911,
"learning_rate": 3.5443440019545566e-05,
"loss": 0.7423,
"step": 29800
},
{
"epoch": 0.2921776518297748,
"grad_norm": 0.2898177206516266,
"learning_rate": 3.5394576105546054e-05,
"loss": 0.7322,
"step": 29900
},
{
"epoch": 0.29315483461181413,
"grad_norm": 0.720974862575531,
"learning_rate": 3.534571219154654e-05,
"loss": 0.7593,
"step": 30000
},
{
"epoch": 0.29413201739385353,
"grad_norm": 0.4672446548938751,
"learning_rate": 3.529684827754704e-05,
"loss": 0.7422,
"step": 30100
},
{
"epoch": 0.2951092001758929,
"grad_norm": 0.7546716332435608,
"learning_rate": 3.524798436354752e-05,
"loss": 0.7788,
"step": 30200
},
{
"epoch": 0.29608638295793227,
"grad_norm": 0.6265705823898315,
"learning_rate": 3.519912044954801e-05,
"loss": 0.745,
"step": 30300
},
{
"epoch": 0.29706356573997167,
"grad_norm": 1.092965841293335,
"learning_rate": 3.51502565355485e-05,
"loss": 0.789,
"step": 30400
},
{
"epoch": 0.29804074852201107,
"grad_norm": 0.7648272514343262,
"learning_rate": 3.510139262154899e-05,
"loss": 0.758,
"step": 30500
},
{
"epoch": 0.2990179313040504,
"grad_norm": 0.785746157169342,
"learning_rate": 3.505252870754948e-05,
"loss": 0.7744,
"step": 30600
},
{
"epoch": 0.2999951140860898,
"grad_norm": 0.8007264733314514,
"learning_rate": 3.5003664793549965e-05,
"loss": 0.7696,
"step": 30700
},
{
"epoch": 0.3009722968681292,
"grad_norm": 1.1369248628616333,
"learning_rate": 3.4954800879550453e-05,
"loss": 0.7667,
"step": 30800
},
{
"epoch": 0.30194947965016855,
"grad_norm": 0.6251523494720459,
"learning_rate": 3.490593696555095e-05,
"loss": 0.7686,
"step": 30900
},
{
"epoch": 0.30292666243220795,
"grad_norm": 1.1552335023880005,
"learning_rate": 3.485707305155143e-05,
"loss": 0.7693,
"step": 31000
},
{
"epoch": 0.30390384521424735,
"grad_norm": 0.9136368036270142,
"learning_rate": 3.480820913755192e-05,
"loss": 0.7898,
"step": 31100
},
{
"epoch": 0.3048810279962867,
"grad_norm": 0.4203650951385498,
"learning_rate": 3.4759345223552406e-05,
"loss": 0.7541,
"step": 31200
},
{
"epoch": 0.3058582107783261,
"grad_norm": 0.671546995639801,
"learning_rate": 3.4710481309552894e-05,
"loss": 0.735,
"step": 31300
},
{
"epoch": 0.3068353935603655,
"grad_norm": 0.6711509227752686,
"learning_rate": 3.466161739555339e-05,
"loss": 0.7481,
"step": 31400
},
{
"epoch": 0.30781257634240483,
"grad_norm": 0.7787076234817505,
"learning_rate": 3.461275348155387e-05,
"loss": 0.7701,
"step": 31500
},
{
"epoch": 0.30878975912444423,
"grad_norm": 0.5270808935165405,
"learning_rate": 3.4563889567554365e-05,
"loss": 0.7166,
"step": 31600
},
{
"epoch": 0.30976694190648363,
"grad_norm": 0.7732633352279663,
"learning_rate": 3.451502565355485e-05,
"loss": 0.7857,
"step": 31700
},
{
"epoch": 0.31074412468852297,
"grad_norm": 0.6347182989120483,
"learning_rate": 3.446616173955534e-05,
"loss": 0.7384,
"step": 31800
},
{
"epoch": 0.31172130747056237,
"grad_norm": 0.9557164311408997,
"learning_rate": 3.441729782555583e-05,
"loss": 0.755,
"step": 31900
},
{
"epoch": 0.31269849025260177,
"grad_norm": 0.8120887279510498,
"learning_rate": 3.436843391155632e-05,
"loss": 0.7356,
"step": 32000
},
{
"epoch": 0.3136756730346411,
"grad_norm": 0.6804450750350952,
"learning_rate": 3.4319569997556805e-05,
"loss": 0.785,
"step": 32100
},
{
"epoch": 0.3146528558166805,
"grad_norm": 0.7511081695556641,
"learning_rate": 3.42707060835573e-05,
"loss": 0.7427,
"step": 32200
},
{
"epoch": 0.3156300385987199,
"grad_norm": 0.8396822214126587,
"learning_rate": 3.422184216955778e-05,
"loss": 0.7801,
"step": 32300
},
{
"epoch": 0.31660722138075925,
"grad_norm": 1.0063520669937134,
"learning_rate": 3.4172978255558276e-05,
"loss": 0.7638,
"step": 32400
},
{
"epoch": 0.31758440416279865,
"grad_norm": 1.349414587020874,
"learning_rate": 3.412411434155876e-05,
"loss": 0.7522,
"step": 32500
},
{
"epoch": 0.31856158694483805,
"grad_norm": 0.8259103298187256,
"learning_rate": 3.4075250427559245e-05,
"loss": 0.7351,
"step": 32600
},
{
"epoch": 0.3195387697268774,
"grad_norm": 0.4894813597202301,
"learning_rate": 3.402638651355974e-05,
"loss": 0.7593,
"step": 32700
},
{
"epoch": 0.3205159525089168,
"grad_norm": 0.6558930277824402,
"learning_rate": 3.397752259956022e-05,
"loss": 0.7496,
"step": 32800
},
{
"epoch": 0.3214931352909562,
"grad_norm": 1.2009482383728027,
"learning_rate": 3.3928658685560716e-05,
"loss": 0.7379,
"step": 32900
},
{
"epoch": 0.32247031807299553,
"grad_norm": 0.8621765375137329,
"learning_rate": 3.3879794771561204e-05,
"loss": 0.7381,
"step": 33000
},
{
"epoch": 0.32344750085503493,
"grad_norm": 0.5097255706787109,
"learning_rate": 3.383093085756169e-05,
"loss": 0.7567,
"step": 33100
},
{
"epoch": 0.32442468363707433,
"grad_norm": 0.48458051681518555,
"learning_rate": 3.378206694356218e-05,
"loss": 0.7649,
"step": 33200
},
{
"epoch": 0.3254018664191137,
"grad_norm": 0.7467001676559448,
"learning_rate": 3.373320302956267e-05,
"loss": 0.7612,
"step": 33300
},
{
"epoch": 0.32637904920115307,
"grad_norm": 1.1591566801071167,
"learning_rate": 3.368433911556316e-05,
"loss": 0.7394,
"step": 33400
},
{
"epoch": 0.32735623198319247,
"grad_norm": 0.9665714502334595,
"learning_rate": 3.363547520156365e-05,
"loss": 0.7472,
"step": 33500
},
{
"epoch": 0.3283334147652318,
"grad_norm": 0.5714060664176941,
"learning_rate": 3.358661128756413e-05,
"loss": 0.7385,
"step": 33600
},
{
"epoch": 0.3293105975472712,
"grad_norm": 0.8278976082801819,
"learning_rate": 3.353774737356463e-05,
"loss": 0.724,
"step": 33700
},
{
"epoch": 0.3302877803293106,
"grad_norm": 0.9210988283157349,
"learning_rate": 3.3488883459565116e-05,
"loss": 0.7542,
"step": 33800
},
{
"epoch": 0.33126496311134995,
"grad_norm": 1.0610690116882324,
"learning_rate": 3.3440019545565604e-05,
"loss": 0.7284,
"step": 33900
},
{
"epoch": 0.33224214589338935,
"grad_norm": 0.6521257162094116,
"learning_rate": 3.339115563156609e-05,
"loss": 0.755,
"step": 34000
},
{
"epoch": 0.33321932867542875,
"grad_norm": 1.0515367984771729,
"learning_rate": 3.334229171756657e-05,
"loss": 0.7423,
"step": 34100
},
{
"epoch": 0.3341965114574681,
"grad_norm": 0.8415219783782959,
"learning_rate": 3.329342780356707e-05,
"loss": 0.716,
"step": 34200
},
{
"epoch": 0.3351736942395075,
"grad_norm": 0.5018264651298523,
"learning_rate": 3.3244563889567556e-05,
"loss": 0.7556,
"step": 34300
},
{
"epoch": 0.3361508770215469,
"grad_norm": 0.6532925963401794,
"learning_rate": 3.3195699975568044e-05,
"loss": 0.7335,
"step": 34400
},
{
"epoch": 0.3371280598035863,
"grad_norm": 0.6794486045837402,
"learning_rate": 3.314683606156853e-05,
"loss": 0.7466,
"step": 34500
},
{
"epoch": 0.33810524258562563,
"grad_norm": 0.7372865080833435,
"learning_rate": 3.309797214756902e-05,
"loss": 0.727,
"step": 34600
},
{
"epoch": 0.33908242536766503,
"grad_norm": 0.6354756355285645,
"learning_rate": 3.304910823356951e-05,
"loss": 0.725,
"step": 34700
},
{
"epoch": 0.34005960814970443,
"grad_norm": 0.7180996537208557,
"learning_rate": 3.300024431957e-05,
"loss": 0.7049,
"step": 34800
},
{
"epoch": 0.34103679093174377,
"grad_norm": 1.3991978168487549,
"learning_rate": 3.2951380405570484e-05,
"loss": 0.7251,
"step": 34900
},
{
"epoch": 0.34201397371378317,
"grad_norm": 0.5680633783340454,
"learning_rate": 3.290251649157098e-05,
"loss": 0.744,
"step": 35000
},
{
"epoch": 0.34299115649582257,
"grad_norm": 0.5309197306632996,
"learning_rate": 3.285365257757147e-05,
"loss": 0.7277,
"step": 35100
},
{
"epoch": 0.3439683392778619,
"grad_norm": 1.449625849723816,
"learning_rate": 3.2804788663571955e-05,
"loss": 0.7127,
"step": 35200
},
{
"epoch": 0.3449455220599013,
"grad_norm": 0.6244996190071106,
"learning_rate": 3.2755924749572443e-05,
"loss": 0.6992,
"step": 35300
},
{
"epoch": 0.3459227048419407,
"grad_norm": 1.037988305091858,
"learning_rate": 3.270706083557293e-05,
"loss": 0.7095,
"step": 35400
},
{
"epoch": 0.34689988762398005,
"grad_norm": 1.2503726482391357,
"learning_rate": 3.265819692157342e-05,
"loss": 0.7264,
"step": 35500
},
{
"epoch": 0.34787707040601945,
"grad_norm": 1.2136774063110352,
"learning_rate": 3.260933300757391e-05,
"loss": 0.7418,
"step": 35600
},
{
"epoch": 0.34885425318805885,
"grad_norm": 0.9328750371932983,
"learning_rate": 3.2560469093574396e-05,
"loss": 0.7509,
"step": 35700
},
{
"epoch": 0.3498314359700982,
"grad_norm": 0.5122935771942139,
"learning_rate": 3.2511605179574884e-05,
"loss": 0.7114,
"step": 35800
},
{
"epoch": 0.3508086187521376,
"grad_norm": 1.153583288192749,
"learning_rate": 3.246274126557537e-05,
"loss": 0.7316,
"step": 35900
},
{
"epoch": 0.351785801534177,
"grad_norm": 0.7405250668525696,
"learning_rate": 3.241387735157586e-05,
"loss": 0.7404,
"step": 36000
},
{
"epoch": 0.35276298431621633,
"grad_norm": 0.607565701007843,
"learning_rate": 3.2365013437576355e-05,
"loss": 0.7196,
"step": 36100
},
{
"epoch": 0.35374016709825573,
"grad_norm": 1.4975577592849731,
"learning_rate": 3.2316149523576836e-05,
"loss": 0.703,
"step": 36200
},
{
"epoch": 0.35471734988029513,
"grad_norm": 0.9088447093963623,
"learning_rate": 3.226728560957733e-05,
"loss": 0.7203,
"step": 36300
},
{
"epoch": 0.3556945326623345,
"grad_norm": 0.9132680892944336,
"learning_rate": 3.221842169557782e-05,
"loss": 0.7248,
"step": 36400
},
{
"epoch": 0.35667171544437387,
"grad_norm": 0.7861882448196411,
"learning_rate": 3.216955778157831e-05,
"loss": 0.7118,
"step": 36500
},
{
"epoch": 0.35764889822641327,
"grad_norm": 1.2251768112182617,
"learning_rate": 3.2120693867578795e-05,
"loss": 0.7304,
"step": 36600
},
{
"epoch": 0.3586260810084526,
"grad_norm": 1.1924370527267456,
"learning_rate": 3.207182995357928e-05,
"loss": 0.7394,
"step": 36700
},
{
"epoch": 0.359603263790492,
"grad_norm": 0.7275030016899109,
"learning_rate": 3.202296603957977e-05,
"loss": 0.7399,
"step": 36800
},
{
"epoch": 0.3605804465725314,
"grad_norm": 0.7406324148178101,
"learning_rate": 3.1974102125580266e-05,
"loss": 0.7432,
"step": 36900
},
{
"epoch": 0.36155762935457075,
"grad_norm": 1.0701793432235718,
"learning_rate": 3.192523821158075e-05,
"loss": 0.7099,
"step": 37000
},
{
"epoch": 0.36253481213661015,
"grad_norm": 0.7077426314353943,
"learning_rate": 3.1876374297581235e-05,
"loss": 0.7127,
"step": 37100
},
{
"epoch": 0.36351199491864955,
"grad_norm": 0.5806621313095093,
"learning_rate": 3.1827510383581723e-05,
"loss": 0.7002,
"step": 37200
},
{
"epoch": 0.3644891777006889,
"grad_norm": 1.1311944723129272,
"learning_rate": 3.177864646958221e-05,
"loss": 0.6876,
"step": 37300
},
{
"epoch": 0.3654663604827283,
"grad_norm": 0.9112023711204529,
"learning_rate": 3.1729782555582706e-05,
"loss": 0.7169,
"step": 37400
},
{
"epoch": 0.3664435432647677,
"grad_norm": 0.5986848473548889,
"learning_rate": 3.168091864158319e-05,
"loss": 0.7294,
"step": 37500
},
{
"epoch": 0.36742072604680703,
"grad_norm": 1.297155737876892,
"learning_rate": 3.163205472758368e-05,
"loss": 0.7061,
"step": 37600
},
{
"epoch": 0.36839790882884643,
"grad_norm": 0.6597927808761597,
"learning_rate": 3.158319081358417e-05,
"loss": 0.7166,
"step": 37700
},
{
"epoch": 0.36937509161088583,
"grad_norm": 0.36105087399482727,
"learning_rate": 3.153432689958466e-05,
"loss": 0.7017,
"step": 37800
},
{
"epoch": 0.3703522743929252,
"grad_norm": 0.5487505197525024,
"learning_rate": 3.148546298558515e-05,
"loss": 0.7081,
"step": 37900
},
{
"epoch": 0.3713294571749646,
"grad_norm": 1.5384310483932495,
"learning_rate": 3.1436599071585635e-05,
"loss": 0.7064,
"step": 38000
},
{
"epoch": 0.37230663995700397,
"grad_norm": 1.0113205909729004,
"learning_rate": 3.138773515758612e-05,
"loss": 0.7197,
"step": 38100
},
{
"epoch": 0.3732838227390433,
"grad_norm": 1.4755492210388184,
"learning_rate": 3.133887124358662e-05,
"loss": 0.755,
"step": 38200
},
{
"epoch": 0.3742610055210827,
"grad_norm": 0.7554188370704651,
"learning_rate": 3.12900073295871e-05,
"loss": 0.7083,
"step": 38300
},
{
"epoch": 0.3752381883031221,
"grad_norm": 0.7589747905731201,
"learning_rate": 3.1241143415587594e-05,
"loss": 0.6917,
"step": 38400
},
{
"epoch": 0.37621537108516145,
"grad_norm": 0.485612690448761,
"learning_rate": 3.119227950158808e-05,
"loss": 0.7429,
"step": 38500
},
{
"epoch": 0.37719255386720085,
"grad_norm": 0.5043421983718872,
"learning_rate": 3.114341558758856e-05,
"loss": 0.7217,
"step": 38600
},
{
"epoch": 0.37816973664924025,
"grad_norm": 1.6078003644943237,
"learning_rate": 3.109455167358906e-05,
"loss": 0.7019,
"step": 38700
},
{
"epoch": 0.3791469194312796,
"grad_norm": 0.3607342839241028,
"learning_rate": 3.104568775958954e-05,
"loss": 0.772,
"step": 38800
},
{
"epoch": 0.380124102213319,
"grad_norm": 1.002525806427002,
"learning_rate": 3.0996823845590034e-05,
"loss": 0.7213,
"step": 38900
},
{
"epoch": 0.3811012849953584,
"grad_norm": 0.7605811357498169,
"learning_rate": 3.094795993159052e-05,
"loss": 0.7,
"step": 39000
},
{
"epoch": 0.3820784677773978,
"grad_norm": 2.388939619064331,
"learning_rate": 3.089909601759101e-05,
"loss": 0.7307,
"step": 39100
},
{
"epoch": 0.38305565055943713,
"grad_norm": 0.824883222579956,
"learning_rate": 3.08502321035915e-05,
"loss": 0.7255,
"step": 39200
},
{
"epoch": 0.38403283334147653,
"grad_norm": 0.6755787134170532,
"learning_rate": 3.0801368189591986e-05,
"loss": 0.7013,
"step": 39300
},
{
"epoch": 0.38501001612351593,
"grad_norm": 0.580859899520874,
"learning_rate": 3.0752504275592474e-05,
"loss": 0.7357,
"step": 39400
},
{
"epoch": 0.3859871989055553,
"grad_norm": 0.6988548636436462,
"learning_rate": 3.070364036159297e-05,
"loss": 0.6902,
"step": 39500
},
{
"epoch": 0.38696438168759467,
"grad_norm": 0.5997043251991272,
"learning_rate": 3.065477644759345e-05,
"loss": 0.7093,
"step": 39600
},
{
"epoch": 0.38794156446963407,
"grad_norm": 0.7906262874603271,
"learning_rate": 3.0605912533593945e-05,
"loss": 0.7376,
"step": 39700
},
{
"epoch": 0.3889187472516734,
"grad_norm": 0.7436035871505737,
"learning_rate": 3.0557048619594433e-05,
"loss": 0.7159,
"step": 39800
},
{
"epoch": 0.3898959300337128,
"grad_norm": 0.6913009285926819,
"learning_rate": 3.050818470559492e-05,
"loss": 0.7267,
"step": 39900
},
{
"epoch": 0.3908731128157522,
"grad_norm": 1.0030348300933838,
"learning_rate": 3.045932079159541e-05,
"loss": 0.7186,
"step": 40000
},
{
"epoch": 0.39185029559779155,
"grad_norm": 0.7223851084709167,
"learning_rate": 3.0410456877595894e-05,
"loss": 0.7113,
"step": 40100
},
{
"epoch": 0.39282747837983095,
"grad_norm": 1.0449798107147217,
"learning_rate": 3.0361592963596386e-05,
"loss": 0.6985,
"step": 40200
},
{
"epoch": 0.39380466116187035,
"grad_norm": 0.7078452110290527,
"learning_rate": 3.031272904959687e-05,
"loss": 0.714,
"step": 40300
},
{
"epoch": 0.3947818439439097,
"grad_norm": 0.5977550148963928,
"learning_rate": 3.0263865135597362e-05,
"loss": 0.7126,
"step": 40400
},
{
"epoch": 0.3957590267259491,
"grad_norm": 0.6963929533958435,
"learning_rate": 3.021500122159785e-05,
"loss": 0.6922,
"step": 40500
},
{
"epoch": 0.3967362095079885,
"grad_norm": 0.49735382199287415,
"learning_rate": 3.016613730759834e-05,
"loss": 0.6914,
"step": 40600
},
{
"epoch": 0.39771339229002783,
"grad_norm": 0.8894415497779846,
"learning_rate": 3.0117273393598826e-05,
"loss": 0.6988,
"step": 40700
},
{
"epoch": 0.39869057507206723,
"grad_norm": 0.5845156311988831,
"learning_rate": 3.0068409479599317e-05,
"loss": 0.705,
"step": 40800
},
{
"epoch": 0.39966775785410663,
"grad_norm": 0.7496864199638367,
"learning_rate": 3.0019545565599806e-05,
"loss": 0.669,
"step": 40900
},
{
"epoch": 0.400644940636146,
"grad_norm": 1.2446004152297974,
"learning_rate": 2.9970681651600297e-05,
"loss": 0.7063,
"step": 41000
},
{
"epoch": 0.4016221234181854,
"grad_norm": 0.37521255016326904,
"learning_rate": 2.992181773760078e-05,
"loss": 0.6966,
"step": 41100
},
{
"epoch": 0.40259930620022477,
"grad_norm": 0.7953245639801025,
"learning_rate": 2.9872953823601273e-05,
"loss": 0.6934,
"step": 41200
},
{
"epoch": 0.4035764889822641,
"grad_norm": 0.844543993473053,
"learning_rate": 2.982408990960176e-05,
"loss": 0.6926,
"step": 41300
},
{
"epoch": 0.4045536717643035,
"grad_norm": 0.5298857688903809,
"learning_rate": 2.9775225995602253e-05,
"loss": 0.6926,
"step": 41400
},
{
"epoch": 0.4055308545463429,
"grad_norm": 0.6932188272476196,
"learning_rate": 2.9726362081602737e-05,
"loss": 0.6868,
"step": 41500
},
{
"epoch": 0.40650803732838225,
"grad_norm": 0.7204051613807678,
"learning_rate": 2.9677498167603225e-05,
"loss": 0.7064,
"step": 41600
},
{
"epoch": 0.40748522011042165,
"grad_norm": 1.0420963764190674,
"learning_rate": 2.9628634253603717e-05,
"loss": 0.7072,
"step": 41700
},
{
"epoch": 0.40846240289246105,
"grad_norm": 0.4677026867866516,
"learning_rate": 2.95797703396042e-05,
"loss": 0.691,
"step": 41800
},
{
"epoch": 0.4094395856745004,
"grad_norm": 0.6934903860092163,
"learning_rate": 2.9530906425604693e-05,
"loss": 0.6962,
"step": 41900
},
{
"epoch": 0.4104167684565398,
"grad_norm": 0.7500805854797363,
"learning_rate": 2.9482042511605178e-05,
"loss": 0.708,
"step": 42000
},
{
"epoch": 0.4113939512385792,
"grad_norm": 0.8887515664100647,
"learning_rate": 2.943317859760567e-05,
"loss": 0.702,
"step": 42100
},
{
"epoch": 0.41237113402061853,
"grad_norm": 0.39899566769599915,
"learning_rate": 2.9384314683606157e-05,
"loss": 0.709,
"step": 42200
},
{
"epoch": 0.41334831680265793,
"grad_norm": 0.8467943668365479,
"learning_rate": 2.933545076960665e-05,
"loss": 0.6928,
"step": 42300
},
{
"epoch": 0.41432549958469733,
"grad_norm": 0.6024282574653625,
"learning_rate": 2.9286586855607133e-05,
"loss": 0.6928,
"step": 42400
},
{
"epoch": 0.4153026823667367,
"grad_norm": 0.7921658158302307,
"learning_rate": 2.9237722941607625e-05,
"loss": 0.6865,
"step": 42500
},
{
"epoch": 0.4162798651487761,
"grad_norm": 0.9025784730911255,
"learning_rate": 2.9188859027608113e-05,
"loss": 0.6863,
"step": 42600
},
{
"epoch": 0.4172570479308155,
"grad_norm": 0.9453756809234619,
"learning_rate": 2.9139995113608604e-05,
"loss": 0.6924,
"step": 42700
},
{
"epoch": 0.4182342307128548,
"grad_norm": 0.8638947010040283,
"learning_rate": 2.909113119960909e-05,
"loss": 0.7011,
"step": 42800
},
{
"epoch": 0.4192114134948942,
"grad_norm": 0.6639747619628906,
"learning_rate": 2.904226728560958e-05,
"loss": 0.6766,
"step": 42900
},
{
"epoch": 0.4201885962769336,
"grad_norm": 0.7019941210746765,
"learning_rate": 2.899340337161007e-05,
"loss": 0.7025,
"step": 43000
},
{
"epoch": 0.42116577905897296,
"grad_norm": 0.6988587379455566,
"learning_rate": 2.8944539457610553e-05,
"loss": 0.6768,
"step": 43100
},
{
"epoch": 0.42214296184101235,
"grad_norm": 0.5817476511001587,
"learning_rate": 2.8895675543611045e-05,
"loss": 0.702,
"step": 43200
},
{
"epoch": 0.42312014462305175,
"grad_norm": 0.4533466398715973,
"learning_rate": 2.8846811629611533e-05,
"loss": 0.6949,
"step": 43300
},
{
"epoch": 0.4240973274050911,
"grad_norm": 0.6197069883346558,
"learning_rate": 2.8797947715612024e-05,
"loss": 0.684,
"step": 43400
},
{
"epoch": 0.4250745101871305,
"grad_norm": 1.693144679069519,
"learning_rate": 2.874908380161251e-05,
"loss": 0.7201,
"step": 43500
},
{
"epoch": 0.4260516929691699,
"grad_norm": 1.1772024631500244,
"learning_rate": 2.8700219887613e-05,
"loss": 0.6936,
"step": 43600
},
{
"epoch": 0.4270288757512093,
"grad_norm": 0.5265709161758423,
"learning_rate": 2.8651355973613485e-05,
"loss": 0.6994,
"step": 43700
},
{
"epoch": 0.42800605853324863,
"grad_norm": 0.8301248550415039,
"learning_rate": 2.8602492059613976e-05,
"loss": 0.6968,
"step": 43800
},
{
"epoch": 0.42898324131528803,
"grad_norm": 1.2123380899429321,
"learning_rate": 2.8553628145614464e-05,
"loss": 0.7013,
"step": 43900
},
{
"epoch": 0.42996042409732743,
"grad_norm": 1.3780418634414673,
"learning_rate": 2.8504764231614956e-05,
"loss": 0.6826,
"step": 44000
},
{
"epoch": 0.4309376068793668,
"grad_norm": 0.6333886981010437,
"learning_rate": 2.845590031761544e-05,
"loss": 0.6842,
"step": 44100
},
{
"epoch": 0.4319147896614062,
"grad_norm": 0.5353469252586365,
"learning_rate": 2.8407036403615932e-05,
"loss": 0.6751,
"step": 44200
},
{
"epoch": 0.43289197244344557,
"grad_norm": 0.9482343792915344,
"learning_rate": 2.835817248961642e-05,
"loss": 0.6961,
"step": 44300
},
{
"epoch": 0.4338691552254849,
"grad_norm": 0.7306164503097534,
"learning_rate": 2.830930857561691e-05,
"loss": 0.6829,
"step": 44400
},
{
"epoch": 0.4348463380075243,
"grad_norm": 0.9290406107902527,
"learning_rate": 2.8260444661617396e-05,
"loss": 0.7109,
"step": 44500
},
{
"epoch": 0.4358235207895637,
"grad_norm": 0.5903436541557312,
"learning_rate": 2.8211580747617884e-05,
"loss": 0.7144,
"step": 44600
},
{
"epoch": 0.43680070357160306,
"grad_norm": 0.7370823621749878,
"learning_rate": 2.8162716833618376e-05,
"loss": 0.6858,
"step": 44700
},
{
"epoch": 0.43777788635364245,
"grad_norm": 0.5477197766304016,
"learning_rate": 2.811385291961886e-05,
"loss": 0.6951,
"step": 44800
},
{
"epoch": 0.43875506913568185,
"grad_norm": 0.8994666934013367,
"learning_rate": 2.8064989005619352e-05,
"loss": 0.705,
"step": 44900
},
{
"epoch": 0.4397322519177212,
"grad_norm": 1.171186089515686,
"learning_rate": 2.8016125091619836e-05,
"loss": 0.6812,
"step": 45000
},
{
"epoch": 0.4407094346997606,
"grad_norm": 0.6986414194107056,
"learning_rate": 2.796726117762033e-05,
"loss": 0.6729,
"step": 45100
},
{
"epoch": 0.4416866174818,
"grad_norm": 0.8245409727096558,
"learning_rate": 2.7918397263620816e-05,
"loss": 0.6679,
"step": 45200
},
{
"epoch": 0.44266380026383934,
"grad_norm": 0.8805913925170898,
"learning_rate": 2.7869533349621307e-05,
"loss": 0.7042,
"step": 45300
},
{
"epoch": 0.44364098304587873,
"grad_norm": 0.7037094831466675,
"learning_rate": 2.7820669435621792e-05,
"loss": 0.6988,
"step": 45400
},
{
"epoch": 0.44461816582791813,
"grad_norm": 1.118363380432129,
"learning_rate": 2.7771805521622284e-05,
"loss": 0.6866,
"step": 45500
},
{
"epoch": 0.4455953486099575,
"grad_norm": 1.0665768384933472,
"learning_rate": 2.772294160762277e-05,
"loss": 0.6732,
"step": 45600
},
{
"epoch": 0.4465725313919969,
"grad_norm": 0.7593882083892822,
"learning_rate": 2.7674077693623263e-05,
"loss": 0.6951,
"step": 45700
},
{
"epoch": 0.4475497141740363,
"grad_norm": 2.3182179927825928,
"learning_rate": 2.7625213779623748e-05,
"loss": 0.6695,
"step": 45800
},
{
"epoch": 0.4485268969560756,
"grad_norm": 1.2548315525054932,
"learning_rate": 2.757634986562424e-05,
"loss": 0.7128,
"step": 45900
},
{
"epoch": 0.449504079738115,
"grad_norm": 0.8613176941871643,
"learning_rate": 2.7527485951624727e-05,
"loss": 0.6956,
"step": 46000
},
{
"epoch": 0.4504812625201544,
"grad_norm": 0.946165919303894,
"learning_rate": 2.7478622037625212e-05,
"loss": 0.7177,
"step": 46100
},
{
"epoch": 0.45145844530219376,
"grad_norm": 0.9122072458267212,
"learning_rate": 2.7429758123625703e-05,
"loss": 0.7094,
"step": 46200
},
{
"epoch": 0.45243562808423315,
"grad_norm": 0.8797391057014465,
"learning_rate": 2.738089420962619e-05,
"loss": 0.7118,
"step": 46300
},
{
"epoch": 0.45341281086627255,
"grad_norm": 0.5321417450904846,
"learning_rate": 2.7332030295626683e-05,
"loss": 0.6923,
"step": 46400
},
{
"epoch": 0.4543899936483119,
"grad_norm": 1.0878016948699951,
"learning_rate": 2.7283166381627168e-05,
"loss": 0.72,
"step": 46500
},
{
"epoch": 0.4553671764303513,
"grad_norm": 0.8534865975379944,
"learning_rate": 2.723430246762766e-05,
"loss": 0.6945,
"step": 46600
},
{
"epoch": 0.4563443592123907,
"grad_norm": 0.8475703597068787,
"learning_rate": 2.7185438553628144e-05,
"loss": 0.6891,
"step": 46700
},
{
"epoch": 0.45732154199443004,
"grad_norm": 0.7100959420204163,
"learning_rate": 2.713657463962864e-05,
"loss": 0.6605,
"step": 46800
},
{
"epoch": 0.45829872477646943,
"grad_norm": 0.6616931557655334,
"learning_rate": 2.7087710725629123e-05,
"loss": 0.6678,
"step": 46900
},
{
"epoch": 0.45927590755850883,
"grad_norm": 1.2114359140396118,
"learning_rate": 2.7038846811629615e-05,
"loss": 0.6525,
"step": 47000
},
{
"epoch": 0.4602530903405482,
"grad_norm": 0.4216634929180145,
"learning_rate": 2.69899828976301e-05,
"loss": 0.6881,
"step": 47100
},
{
"epoch": 0.4612302731225876,
"grad_norm": 0.7598534822463989,
"learning_rate": 2.694111898363059e-05,
"loss": 0.6555,
"step": 47200
},
{
"epoch": 0.462207455904627,
"grad_norm": 0.9792212843894958,
"learning_rate": 2.689225506963108e-05,
"loss": 0.6866,
"step": 47300
},
{
"epoch": 0.4631846386866663,
"grad_norm": 0.5867584943771362,
"learning_rate": 2.684339115563157e-05,
"loss": 0.6541,
"step": 47400
},
{
"epoch": 0.4641618214687057,
"grad_norm": 0.8288137912750244,
"learning_rate": 2.6794527241632055e-05,
"loss": 0.7057,
"step": 47500
},
{
"epoch": 0.4651390042507451,
"grad_norm": 1.5305638313293457,
"learning_rate": 2.6745663327632543e-05,
"loss": 0.6752,
"step": 47600
},
{
"epoch": 0.46611618703278446,
"grad_norm": 1.0784820318222046,
"learning_rate": 2.6696799413633035e-05,
"loss": 0.7041,
"step": 47700
},
{
"epoch": 0.46709336981482386,
"grad_norm": 0.7708161473274231,
"learning_rate": 2.664793549963352e-05,
"loss": 0.6766,
"step": 47800
},
{
"epoch": 0.46807055259686325,
"grad_norm": 0.7639223337173462,
"learning_rate": 2.659907158563401e-05,
"loss": 0.6553,
"step": 47900
},
{
"epoch": 0.4690477353789026,
"grad_norm": 0.4256194233894348,
"learning_rate": 2.65502076716345e-05,
"loss": 0.6921,
"step": 48000
},
{
"epoch": 0.470024918160942,
"grad_norm": 1.2620900869369507,
"learning_rate": 2.650134375763499e-05,
"loss": 0.6945,
"step": 48100
},
{
"epoch": 0.4710021009429814,
"grad_norm": 0.7683165073394775,
"learning_rate": 2.6452479843635475e-05,
"loss": 0.6594,
"step": 48200
},
{
"epoch": 0.4719792837250208,
"grad_norm": 0.784582257270813,
"learning_rate": 2.6403615929635966e-05,
"loss": 0.6877,
"step": 48300
},
{
"epoch": 0.47295646650706014,
"grad_norm": 0.7894740104675293,
"learning_rate": 2.635475201563645e-05,
"loss": 0.6944,
"step": 48400
},
{
"epoch": 0.47393364928909953,
"grad_norm": 0.6949831247329712,
"learning_rate": 2.6305888101636942e-05,
"loss": 0.6625,
"step": 48500
},
{
"epoch": 0.47491083207113893,
"grad_norm": 0.5648496747016907,
"learning_rate": 2.625702418763743e-05,
"loss": 0.6489,
"step": 48600
},
{
"epoch": 0.4758880148531783,
"grad_norm": 0.8879817128181458,
"learning_rate": 2.6208160273637922e-05,
"loss": 0.6465,
"step": 48700
},
{
"epoch": 0.4768651976352177,
"grad_norm": 0.5845817923545837,
"learning_rate": 2.6159296359638407e-05,
"loss": 0.7044,
"step": 48800
},
{
"epoch": 0.4778423804172571,
"grad_norm": 0.8040775060653687,
"learning_rate": 2.6110432445638898e-05,
"loss": 0.6745,
"step": 48900
},
{
"epoch": 0.4788195631992964,
"grad_norm": 0.5439351201057434,
"learning_rate": 2.6061568531639386e-05,
"loss": 0.6924,
"step": 49000
},
{
"epoch": 0.4797967459813358,
"grad_norm": 1.1411272287368774,
"learning_rate": 2.601270461763987e-05,
"loss": 0.6834,
"step": 49100
},
{
"epoch": 0.4807739287633752,
"grad_norm": 0.7273046374320984,
"learning_rate": 2.5963840703640362e-05,
"loss": 0.6547,
"step": 49200
},
{
"epoch": 0.48175111154541456,
"grad_norm": 0.9065064787864685,
"learning_rate": 2.591497678964085e-05,
"loss": 0.6792,
"step": 49300
},
{
"epoch": 0.48272829432745396,
"grad_norm": 0.6722708344459534,
"learning_rate": 2.5866112875641342e-05,
"loss": 0.6913,
"step": 49400
},
{
"epoch": 0.48370547710949335,
"grad_norm": 0.6576828360557556,
"learning_rate": 2.5817248961641826e-05,
"loss": 0.6741,
"step": 49500
},
{
"epoch": 0.4846826598915327,
"grad_norm": 0.46869999170303345,
"learning_rate": 2.5768385047642318e-05,
"loss": 0.6729,
"step": 49600
},
{
"epoch": 0.4856598426735721,
"grad_norm": 0.735565185546875,
"learning_rate": 2.5719521133642806e-05,
"loss": 0.6781,
"step": 49700
},
{
"epoch": 0.4866370254556115,
"grad_norm": 0.6392993927001953,
"learning_rate": 2.5670657219643297e-05,
"loss": 0.6824,
"step": 49800
},
{
"epoch": 0.48761420823765084,
"grad_norm": 3.2004761695861816,
"learning_rate": 2.5621793305643782e-05,
"loss": 0.6862,
"step": 49900
},
{
"epoch": 0.48859139101969024,
"grad_norm": 0.6201328635215759,
"learning_rate": 2.5572929391644274e-05,
"loss": 0.664,
"step": 50000
},
{
"epoch": 0.48956857380172963,
"grad_norm": 1.179991364479065,
"learning_rate": 2.5524065477644758e-05,
"loss": 0.6841,
"step": 50100
},
{
"epoch": 0.490545756583769,
"grad_norm": 0.942451000213623,
"learning_rate": 2.547520156364525e-05,
"loss": 0.6555,
"step": 50200
},
{
"epoch": 0.4915229393658084,
"grad_norm": 1.1190769672393799,
"learning_rate": 2.5426337649645738e-05,
"loss": 0.673,
"step": 50300
},
{
"epoch": 0.4925001221478478,
"grad_norm": 0.712053656578064,
"learning_rate": 2.537747373564623e-05,
"loss": 0.6849,
"step": 50400
},
{
"epoch": 0.4934773049298871,
"grad_norm": 1.3936710357666016,
"learning_rate": 2.5328609821646714e-05,
"loss": 0.6751,
"step": 50500
},
{
"epoch": 0.4944544877119265,
"grad_norm": 0.5909391045570374,
"learning_rate": 2.5279745907647205e-05,
"loss": 0.683,
"step": 50600
},
{
"epoch": 0.4954316704939659,
"grad_norm": 0.8883010149002075,
"learning_rate": 2.5230881993647693e-05,
"loss": 0.6806,
"step": 50700
},
{
"epoch": 0.49640885327600526,
"grad_norm": 0.7069185376167297,
"learning_rate": 2.5182018079648178e-05,
"loss": 0.6779,
"step": 50800
},
{
"epoch": 0.49738603605804466,
"grad_norm": 0.7906535267829895,
"learning_rate": 2.513315416564867e-05,
"loss": 0.663,
"step": 50900
},
{
"epoch": 0.49836321884008405,
"grad_norm": 1.8775051832199097,
"learning_rate": 2.5084290251649158e-05,
"loss": 0.6924,
"step": 51000
},
{
"epoch": 0.4993404016221234,
"grad_norm": 0.4028649628162384,
"learning_rate": 2.503542633764965e-05,
"loss": 0.6611,
"step": 51100
},
{
"epoch": 0.5003175844041629,
"grad_norm": 0.8514829277992249,
"learning_rate": 2.4986562423650137e-05,
"loss": 0.6423,
"step": 51200
},
{
"epoch": 0.5012947671862021,
"grad_norm": 0.5659759044647217,
"learning_rate": 2.4937698509650625e-05,
"loss": 0.6978,
"step": 51300
},
{
"epoch": 0.5022719499682415,
"grad_norm": 0.8396779298782349,
"learning_rate": 2.488883459565111e-05,
"loss": 0.6593,
"step": 51400
},
{
"epoch": 0.5032491327502809,
"grad_norm": 0.6824951767921448,
"learning_rate": 2.48399706816516e-05,
"loss": 0.6839,
"step": 51500
},
{
"epoch": 0.5042263155323203,
"grad_norm": 0.6299941539764404,
"learning_rate": 2.479110676765209e-05,
"loss": 0.6743,
"step": 51600
},
{
"epoch": 0.5052034983143597,
"grad_norm": 1.2409921884536743,
"learning_rate": 2.4742242853652577e-05,
"loss": 0.6477,
"step": 51700
},
{
"epoch": 0.5061806810963991,
"grad_norm": 0.668393075466156,
"learning_rate": 2.4693378939653065e-05,
"loss": 0.6568,
"step": 51800
},
{
"epoch": 0.5071578638784384,
"grad_norm": 0.5376803278923035,
"learning_rate": 2.4644515025653557e-05,
"loss": 0.6476,
"step": 51900
},
{
"epoch": 0.5081350466604778,
"grad_norm": 1.710288166999817,
"learning_rate": 2.4595651111654045e-05,
"loss": 0.6404,
"step": 52000
},
{
"epoch": 0.5091122294425172,
"grad_norm": 0.6142415404319763,
"learning_rate": 2.4546787197654533e-05,
"loss": 0.7026,
"step": 52100
},
{
"epoch": 0.5100894122245566,
"grad_norm": 0.4976397454738617,
"learning_rate": 2.449792328365502e-05,
"loss": 0.6659,
"step": 52200
},
{
"epoch": 0.511066595006596,
"grad_norm": 0.8558853268623352,
"learning_rate": 2.4449059369655513e-05,
"loss": 0.67,
"step": 52300
},
{
"epoch": 0.5120437777886354,
"grad_norm": 0.620583713054657,
"learning_rate": 2.4400195455656e-05,
"loss": 0.6596,
"step": 52400
},
{
"epoch": 0.5130209605706747,
"grad_norm": 0.8520305752754211,
"learning_rate": 2.435133154165649e-05,
"loss": 0.653,
"step": 52500
},
{
"epoch": 0.5139981433527141,
"grad_norm": 0.43671169877052307,
"learning_rate": 2.4302467627656977e-05,
"loss": 0.6554,
"step": 52600
},
{
"epoch": 0.5149753261347535,
"grad_norm": 0.5502797961235046,
"learning_rate": 2.4253603713657465e-05,
"loss": 0.6432,
"step": 52700
},
{
"epoch": 0.5159525089167929,
"grad_norm": 0.918704628944397,
"learning_rate": 2.4204739799657956e-05,
"loss": 0.6604,
"step": 52800
},
{
"epoch": 0.5169296916988323,
"grad_norm": 0.44583848118782043,
"learning_rate": 2.415587588565844e-05,
"loss": 0.6736,
"step": 52900
},
{
"epoch": 0.5179068744808717,
"grad_norm": 0.8312250971794128,
"learning_rate": 2.410701197165893e-05,
"loss": 0.6645,
"step": 53000
},
{
"epoch": 0.518884057262911,
"grad_norm": 0.39499637484550476,
"learning_rate": 2.4058148057659417e-05,
"loss": 0.6876,
"step": 53100
},
{
"epoch": 0.5198612400449504,
"grad_norm": 0.5650041699409485,
"learning_rate": 2.400928414365991e-05,
"loss": 0.691,
"step": 53200
},
{
"epoch": 0.5208384228269898,
"grad_norm": 0.7247036099433899,
"learning_rate": 2.3960420229660397e-05,
"loss": 0.6367,
"step": 53300
},
{
"epoch": 0.5218156056090292,
"grad_norm": 0.8500406742095947,
"learning_rate": 2.3911556315660885e-05,
"loss": 0.6678,
"step": 53400
},
{
"epoch": 0.5227927883910686,
"grad_norm": 1.2467963695526123,
"learning_rate": 2.3862692401661373e-05,
"loss": 0.6439,
"step": 53500
},
{
"epoch": 0.523769971173108,
"grad_norm": 1.0069133043289185,
"learning_rate": 2.3813828487661864e-05,
"loss": 0.6555,
"step": 53600
},
{
"epoch": 0.5247471539551473,
"grad_norm": 0.9213836193084717,
"learning_rate": 2.3764964573662352e-05,
"loss": 0.6374,
"step": 53700
},
{
"epoch": 0.5257243367371867,
"grad_norm": 0.7063928246498108,
"learning_rate": 2.371610065966284e-05,
"loss": 0.6473,
"step": 53800
},
{
"epoch": 0.5267015195192261,
"grad_norm": 0.7876357436180115,
"learning_rate": 2.366723674566333e-05,
"loss": 0.6599,
"step": 53900
},
{
"epoch": 0.5276787023012655,
"grad_norm": 0.5371726751327515,
"learning_rate": 2.3618372831663816e-05,
"loss": 0.6569,
"step": 54000
},
{
"epoch": 0.5286558850833049,
"grad_norm": 0.6501371264457703,
"learning_rate": 2.3569508917664308e-05,
"loss": 0.6502,
"step": 54100
},
{
"epoch": 0.5296330678653443,
"grad_norm": 1.9818251132965088,
"learning_rate": 2.3520645003664796e-05,
"loss": 0.6628,
"step": 54200
},
{
"epoch": 0.5306102506473835,
"grad_norm": 0.6198662519454956,
"learning_rate": 2.3471781089665284e-05,
"loss": 0.6771,
"step": 54300
},
{
"epoch": 0.5315874334294229,
"grad_norm": 0.70624840259552,
"learning_rate": 2.3422917175665772e-05,
"loss": 0.6685,
"step": 54400
},
{
"epoch": 0.5325646162114623,
"grad_norm": 0.5182805061340332,
"learning_rate": 2.337405326166626e-05,
"loss": 0.6651,
"step": 54500
},
{
"epoch": 0.5335417989935017,
"grad_norm": 1.0862709283828735,
"learning_rate": 2.3325189347666748e-05,
"loss": 0.668,
"step": 54600
},
{
"epoch": 0.5345189817755411,
"grad_norm": 0.5830691456794739,
"learning_rate": 2.3276325433667236e-05,
"loss": 0.67,
"step": 54700
},
{
"epoch": 0.5354961645575805,
"grad_norm": 0.5614120960235596,
"learning_rate": 2.3227461519667724e-05,
"loss": 0.6466,
"step": 54800
},
{
"epoch": 0.5364733473396198,
"grad_norm": 0.6346180438995361,
"learning_rate": 2.3178597605668216e-05,
"loss": 0.6784,
"step": 54900
},
{
"epoch": 0.5374505301216592,
"grad_norm": 0.5453216433525085,
"learning_rate": 2.3129733691668704e-05,
"loss": 0.6507,
"step": 55000
},
{
"epoch": 0.5384277129036986,
"grad_norm": 0.8145617246627808,
"learning_rate": 2.3080869777669192e-05,
"loss": 0.6874,
"step": 55100
},
{
"epoch": 0.539404895685738,
"grad_norm": 0.8334397673606873,
"learning_rate": 2.303200586366968e-05,
"loss": 0.6772,
"step": 55200
},
{
"epoch": 0.5403820784677774,
"grad_norm": 0.5468283295631409,
"learning_rate": 2.298314194967017e-05,
"loss": 0.6448,
"step": 55300
},
{
"epoch": 0.5413592612498168,
"grad_norm": 0.8369360566139221,
"learning_rate": 2.293427803567066e-05,
"loss": 0.6593,
"step": 55400
},
{
"epoch": 0.5423364440318562,
"grad_norm": 0.498793363571167,
"learning_rate": 2.2885414121671148e-05,
"loss": 0.6236,
"step": 55500
},
{
"epoch": 0.5433136268138955,
"grad_norm": 0.6096756458282471,
"learning_rate": 2.2836550207671636e-05,
"loss": 0.6766,
"step": 55600
},
{
"epoch": 0.5442908095959349,
"grad_norm": 0.8249727487564087,
"learning_rate": 2.2787686293672124e-05,
"loss": 0.654,
"step": 55700
},
{
"epoch": 0.5452679923779743,
"grad_norm": 0.9821385145187378,
"learning_rate": 2.2738822379672615e-05,
"loss": 0.6633,
"step": 55800
},
{
"epoch": 0.5462451751600137,
"grad_norm": 1.025420069694519,
"learning_rate": 2.26899584656731e-05,
"loss": 0.6691,
"step": 55900
},
{
"epoch": 0.5472223579420531,
"grad_norm": 1.1872769594192505,
"learning_rate": 2.2641094551673588e-05,
"loss": 0.6811,
"step": 56000
},
{
"epoch": 0.5481995407240925,
"grad_norm": 0.6862273812294006,
"learning_rate": 2.259223063767408e-05,
"loss": 0.6503,
"step": 56100
},
{
"epoch": 0.5491767235061318,
"grad_norm": 1.9515796899795532,
"learning_rate": 2.2543366723674567e-05,
"loss": 0.6672,
"step": 56200
},
{
"epoch": 0.5501539062881712,
"grad_norm": 1.5116077661514282,
"learning_rate": 2.2494502809675055e-05,
"loss": 0.6714,
"step": 56300
},
{
"epoch": 0.5511310890702106,
"grad_norm": 0.710858166217804,
"learning_rate": 2.2445638895675544e-05,
"loss": 0.6577,
"step": 56400
},
{
"epoch": 0.55210827185225,
"grad_norm": 0.6870605945587158,
"learning_rate": 2.239677498167603e-05,
"loss": 0.6655,
"step": 56500
},
{
"epoch": 0.5530854546342894,
"grad_norm": 0.802883505821228,
"learning_rate": 2.2347911067676523e-05,
"loss": 0.6812,
"step": 56600
},
{
"epoch": 0.5540626374163288,
"grad_norm": 1.244555115699768,
"learning_rate": 2.229904715367701e-05,
"loss": 0.655,
"step": 56700
},
{
"epoch": 0.5550398201983681,
"grad_norm": 0.7662067413330078,
"learning_rate": 2.22501832396775e-05,
"loss": 0.6867,
"step": 56800
},
{
"epoch": 0.5560170029804075,
"grad_norm": 0.9172037839889526,
"learning_rate": 2.2201319325677987e-05,
"loss": 0.6427,
"step": 56900
},
{
"epoch": 0.5569941857624469,
"grad_norm": 0.8700697422027588,
"learning_rate": 2.215245541167848e-05,
"loss": 0.6959,
"step": 57000
},
{
"epoch": 0.5579713685444863,
"grad_norm": 1.1184202432632446,
"learning_rate": 2.2103591497678967e-05,
"loss": 0.6601,
"step": 57100
},
{
"epoch": 0.5589485513265257,
"grad_norm": 1.1001787185668945,
"learning_rate": 2.2054727583679455e-05,
"loss": 0.6753,
"step": 57200
},
{
"epoch": 0.559925734108565,
"grad_norm": 0.29295894503593445,
"learning_rate": 2.2005863669679943e-05,
"loss": 0.625,
"step": 57300
},
{
"epoch": 0.5609029168906043,
"grad_norm": 0.5778409242630005,
"learning_rate": 2.195699975568043e-05,
"loss": 0.6554,
"step": 57400
},
{
"epoch": 0.5618800996726437,
"grad_norm": 0.8341584801673889,
"learning_rate": 2.190813584168092e-05,
"loss": 0.6324,
"step": 57500
},
{
"epoch": 0.5628572824546831,
"grad_norm": 1.329548716545105,
"learning_rate": 2.1859271927681407e-05,
"loss": 0.6657,
"step": 57600
},
{
"epoch": 0.5638344652367225,
"grad_norm": 0.6559785604476929,
"learning_rate": 2.1810408013681895e-05,
"loss": 0.6411,
"step": 57700
},
{
"epoch": 0.5648116480187619,
"grad_norm": 1.1021350622177124,
"learning_rate": 2.1761544099682387e-05,
"loss": 0.6363,
"step": 57800
},
{
"epoch": 0.5657888308008013,
"grad_norm": 1.0015547275543213,
"learning_rate": 2.1712680185682875e-05,
"loss": 0.632,
"step": 57900
},
{
"epoch": 0.5667660135828406,
"grad_norm": 0.7394452691078186,
"learning_rate": 2.1663816271683363e-05,
"loss": 0.6882,
"step": 58000
},
{
"epoch": 0.56774319636488,
"grad_norm": 1.0177232027053833,
"learning_rate": 2.161495235768385e-05,
"loss": 0.659,
"step": 58100
},
{
"epoch": 0.5687203791469194,
"grad_norm": 1.182385802268982,
"learning_rate": 2.156608844368434e-05,
"loss": 0.6304,
"step": 58200
},
{
"epoch": 0.5696975619289588,
"grad_norm": 0.6992839574813843,
"learning_rate": 2.151722452968483e-05,
"loss": 0.6419,
"step": 58300
},
{
"epoch": 0.5706747447109982,
"grad_norm": 1.127772331237793,
"learning_rate": 2.146836061568532e-05,
"loss": 0.6762,
"step": 58400
},
{
"epoch": 0.5716519274930376,
"grad_norm": 1.0480372905731201,
"learning_rate": 2.1419496701685806e-05,
"loss": 0.649,
"step": 58500
},
{
"epoch": 0.5726291102750769,
"grad_norm": 0.62301105260849,
"learning_rate": 2.1370632787686295e-05,
"loss": 0.6423,
"step": 58600
},
{
"epoch": 0.5736062930571163,
"grad_norm": 0.7996447086334229,
"learning_rate": 2.1321768873686786e-05,
"loss": 0.6675,
"step": 58700
},
{
"epoch": 0.5745834758391557,
"grad_norm": 0.8735845685005188,
"learning_rate": 2.1272904959687274e-05,
"loss": 0.6251,
"step": 58800
},
{
"epoch": 0.5755606586211951,
"grad_norm": 1.0168455839157104,
"learning_rate": 2.1224041045687762e-05,
"loss": 0.6623,
"step": 58900
},
{
"epoch": 0.5765378414032345,
"grad_norm": 0.7308356165885925,
"learning_rate": 2.1175177131688247e-05,
"loss": 0.6613,
"step": 59000
},
{
"epoch": 0.5775150241852739,
"grad_norm": 1.2486464977264404,
"learning_rate": 2.1126313217688738e-05,
"loss": 0.6424,
"step": 59100
},
{
"epoch": 0.5784922069673132,
"grad_norm": 0.8921827077865601,
"learning_rate": 2.1077449303689226e-05,
"loss": 0.6403,
"step": 59200
},
{
"epoch": 0.5794693897493526,
"grad_norm": 0.5246706604957581,
"learning_rate": 2.1028585389689714e-05,
"loss": 0.6494,
"step": 59300
},
{
"epoch": 0.580446572531392,
"grad_norm": 0.8651568293571472,
"learning_rate": 2.0979721475690202e-05,
"loss": 0.6352,
"step": 59400
},
{
"epoch": 0.5814237553134314,
"grad_norm": 0.9502151608467102,
"learning_rate": 2.093085756169069e-05,
"loss": 0.6661,
"step": 59500
},
{
"epoch": 0.5824009380954708,
"grad_norm": 0.6827490925788879,
"learning_rate": 2.0881993647691182e-05,
"loss": 0.625,
"step": 59600
},
{
"epoch": 0.5833781208775102,
"grad_norm": 0.8105266690254211,
"learning_rate": 2.083312973369167e-05,
"loss": 0.6699,
"step": 59700
},
{
"epoch": 0.5843553036595496,
"grad_norm": 1.005845308303833,
"learning_rate": 2.0784265819692158e-05,
"loss": 0.6528,
"step": 59800
},
{
"epoch": 0.5853324864415889,
"grad_norm": 0.8736119270324707,
"learning_rate": 2.0735401905692646e-05,
"loss": 0.6691,
"step": 59900
},
{
"epoch": 0.5863096692236283,
"grad_norm": 0.8782946467399597,
"learning_rate": 2.0686537991693138e-05,
"loss": 0.6677,
"step": 60000
},
{
"epoch": 0.5872868520056677,
"grad_norm": 0.7457369565963745,
"learning_rate": 2.0637674077693626e-05,
"loss": 0.6323,
"step": 60100
},
{
"epoch": 0.5882640347877071,
"grad_norm": 1.0230743885040283,
"learning_rate": 2.0588810163694114e-05,
"loss": 0.6521,
"step": 60200
},
{
"epoch": 0.5892412175697465,
"grad_norm": 0.8328123688697815,
"learning_rate": 2.0539946249694602e-05,
"loss": 0.6356,
"step": 60300
},
{
"epoch": 0.5902184003517859,
"grad_norm": 0.7374850511550903,
"learning_rate": 2.049108233569509e-05,
"loss": 0.6669,
"step": 60400
},
{
"epoch": 0.5911955831338251,
"grad_norm": 0.505228579044342,
"learning_rate": 2.0442218421695578e-05,
"loss": 0.6734,
"step": 60500
},
{
"epoch": 0.5921727659158645,
"grad_norm": 0.8307722210884094,
"learning_rate": 2.0393354507696066e-05,
"loss": 0.657,
"step": 60600
},
{
"epoch": 0.5931499486979039,
"grad_norm": 0.8867704272270203,
"learning_rate": 2.0344490593696554e-05,
"loss": 0.6407,
"step": 60700
},
{
"epoch": 0.5941271314799433,
"grad_norm": 0.716373085975647,
"learning_rate": 2.0295626679697045e-05,
"loss": 0.6428,
"step": 60800
},
{
"epoch": 0.5951043142619827,
"grad_norm": 0.5812042355537415,
"learning_rate": 2.0246762765697534e-05,
"loss": 0.63,
"step": 60900
},
{
"epoch": 0.5960814970440221,
"grad_norm": 1.0057129859924316,
"learning_rate": 2.019789885169802e-05,
"loss": 0.6161,
"step": 61000
},
{
"epoch": 0.5970586798260614,
"grad_norm": 0.6143211126327515,
"learning_rate": 2.014903493769851e-05,
"loss": 0.6454,
"step": 61100
},
{
"epoch": 0.5980358626081008,
"grad_norm": 1.038710594177246,
"learning_rate": 2.0100171023698998e-05,
"loss": 0.6701,
"step": 61200
},
{
"epoch": 0.5990130453901402,
"grad_norm": 0.6891298294067383,
"learning_rate": 2.005130710969949e-05,
"loss": 0.6666,
"step": 61300
},
{
"epoch": 0.5999902281721796,
"grad_norm": 0.7872188091278076,
"learning_rate": 2.0002443195699977e-05,
"loss": 0.6357,
"step": 61400
},
{
"epoch": 0.600967410954219,
"grad_norm": 1.2167768478393555,
"learning_rate": 1.9953579281700465e-05,
"loss": 0.6686,
"step": 61500
},
{
"epoch": 0.6019445937362584,
"grad_norm": 1.0418341159820557,
"learning_rate": 1.9904715367700953e-05,
"loss": 0.6356,
"step": 61600
},
{
"epoch": 0.6029217765182977,
"grad_norm": 0.6209270358085632,
"learning_rate": 1.9855851453701445e-05,
"loss": 0.657,
"step": 61700
},
{
"epoch": 0.6038989593003371,
"grad_norm": 0.8585149645805359,
"learning_rate": 1.9806987539701933e-05,
"loss": 0.6157,
"step": 61800
},
{
"epoch": 0.6048761420823765,
"grad_norm": 0.5286767482757568,
"learning_rate": 1.975812362570242e-05,
"loss": 0.6734,
"step": 61900
},
{
"epoch": 0.6058533248644159,
"grad_norm": 0.6499518156051636,
"learning_rate": 1.9709259711702906e-05,
"loss": 0.6545,
"step": 62000
},
{
"epoch": 0.6068305076464553,
"grad_norm": 1.4340311288833618,
"learning_rate": 1.9660395797703397e-05,
"loss": 0.6402,
"step": 62100
},
{
"epoch": 0.6078076904284947,
"grad_norm": 0.4783228039741516,
"learning_rate": 1.9611531883703885e-05,
"loss": 0.6495,
"step": 62200
},
{
"epoch": 0.608784873210534,
"grad_norm": 0.6510328054428101,
"learning_rate": 1.9562667969704373e-05,
"loss": 0.6398,
"step": 62300
},
{
"epoch": 0.6097620559925734,
"grad_norm": 0.7298358082771301,
"learning_rate": 1.951380405570486e-05,
"loss": 0.6406,
"step": 62400
},
{
"epoch": 0.6107392387746128,
"grad_norm": 0.7467713952064514,
"learning_rate": 1.9464940141705353e-05,
"loss": 0.6618,
"step": 62500
},
{
"epoch": 0.6117164215566522,
"grad_norm": 1.1706078052520752,
"learning_rate": 1.941607622770584e-05,
"loss": 0.6603,
"step": 62600
},
{
"epoch": 0.6126936043386916,
"grad_norm": 1.9863495826721191,
"learning_rate": 1.936721231370633e-05,
"loss": 0.628,
"step": 62700
},
{
"epoch": 0.613670787120731,
"grad_norm": 1.1297212839126587,
"learning_rate": 1.9318348399706817e-05,
"loss": 0.6198,
"step": 62800
},
{
"epoch": 0.6146479699027703,
"grad_norm": 0.6895560026168823,
"learning_rate": 1.9269484485707305e-05,
"loss": 0.654,
"step": 62900
},
{
"epoch": 0.6156251526848097,
"grad_norm": 0.5572859644889832,
"learning_rate": 1.9220620571707796e-05,
"loss": 0.6237,
"step": 63000
},
{
"epoch": 0.6166023354668491,
"grad_norm": 1.7625269889831543,
"learning_rate": 1.9171756657708284e-05,
"loss": 0.6615,
"step": 63100
},
{
"epoch": 0.6175795182488885,
"grad_norm": 0.9473828673362732,
"learning_rate": 1.9122892743708773e-05,
"loss": 0.624,
"step": 63200
},
{
"epoch": 0.6185567010309279,
"grad_norm": 1.6622077226638794,
"learning_rate": 1.907402882970926e-05,
"loss": 0.648,
"step": 63300
},
{
"epoch": 0.6195338838129673,
"grad_norm": 0.889667809009552,
"learning_rate": 1.9025164915709752e-05,
"loss": 0.6321,
"step": 63400
},
{
"epoch": 0.6205110665950065,
"grad_norm": 0.7613341212272644,
"learning_rate": 1.8976301001710237e-05,
"loss": 0.637,
"step": 63500
},
{
"epoch": 0.6214882493770459,
"grad_norm": 0.9912586212158203,
"learning_rate": 1.8927437087710725e-05,
"loss": 0.6422,
"step": 63600
},
{
"epoch": 0.6224654321590853,
"grad_norm": 0.7905563712120056,
"learning_rate": 1.8878573173711213e-05,
"loss": 0.6362,
"step": 63700
},
{
"epoch": 0.6234426149411247,
"grad_norm": 0.4368293881416321,
"learning_rate": 1.8829709259711704e-05,
"loss": 0.6472,
"step": 63800
},
{
"epoch": 0.6244197977231641,
"grad_norm": 0.8466482758522034,
"learning_rate": 1.8780845345712192e-05,
"loss": 0.673,
"step": 63900
},
{
"epoch": 0.6253969805052035,
"grad_norm": 1.4137593507766724,
"learning_rate": 1.873198143171268e-05,
"loss": 0.6382,
"step": 64000
},
{
"epoch": 0.6263741632872428,
"grad_norm": 1.7590171098709106,
"learning_rate": 1.868311751771317e-05,
"loss": 0.6421,
"step": 64100
},
{
"epoch": 0.6273513460692822,
"grad_norm": 0.7667103409767151,
"learning_rate": 1.863425360371366e-05,
"loss": 0.6448,
"step": 64200
},
{
"epoch": 0.6283285288513216,
"grad_norm": 1.0524508953094482,
"learning_rate": 1.8585389689714148e-05,
"loss": 0.6491,
"step": 64300
},
{
"epoch": 0.629305711633361,
"grad_norm": 0.6090672612190247,
"learning_rate": 1.8536525775714636e-05,
"loss": 0.6416,
"step": 64400
},
{
"epoch": 0.6302828944154004,
"grad_norm": 0.5970349311828613,
"learning_rate": 1.8487661861715124e-05,
"loss": 0.6393,
"step": 64500
},
{
"epoch": 0.6312600771974398,
"grad_norm": 0.9564999341964722,
"learning_rate": 1.8438797947715612e-05,
"loss": 0.6656,
"step": 64600
},
{
"epoch": 0.6322372599794792,
"grad_norm": 1.319643259048462,
"learning_rate": 1.8389934033716104e-05,
"loss": 0.6372,
"step": 64700
},
{
"epoch": 0.6332144427615185,
"grad_norm": 1.0311692953109741,
"learning_rate": 1.8341070119716592e-05,
"loss": 0.6377,
"step": 64800
},
{
"epoch": 0.6341916255435579,
"grad_norm": 0.5185050964355469,
"learning_rate": 1.829220620571708e-05,
"loss": 0.6489,
"step": 64900
},
{
"epoch": 0.6351688083255973,
"grad_norm": 1.0611315965652466,
"learning_rate": 1.8243342291717564e-05,
"loss": 0.6262,
"step": 65000
},
{
"epoch": 0.6361459911076367,
"grad_norm": 0.5177842974662781,
"learning_rate": 1.8194478377718056e-05,
"loss": 0.6424,
"step": 65100
},
{
"epoch": 0.6371231738896761,
"grad_norm": 0.6148577928543091,
"learning_rate": 1.8145614463718544e-05,
"loss": 0.6402,
"step": 65200
},
{
"epoch": 0.6381003566717155,
"grad_norm": 0.686576247215271,
"learning_rate": 1.8096750549719032e-05,
"loss": 0.6361,
"step": 65300
},
{
"epoch": 0.6390775394537548,
"grad_norm": 1.5292381048202515,
"learning_rate": 1.804788663571952e-05,
"loss": 0.6263,
"step": 65400
},
{
"epoch": 0.6400547222357942,
"grad_norm": 0.7201911807060242,
"learning_rate": 1.799902272172001e-05,
"loss": 0.6402,
"step": 65500
},
{
"epoch": 0.6410319050178336,
"grad_norm": 0.7407404184341431,
"learning_rate": 1.79501588077205e-05,
"loss": 0.6149,
"step": 65600
},
{
"epoch": 0.642009087799873,
"grad_norm": 0.7911986708641052,
"learning_rate": 1.7901294893720988e-05,
"loss": 0.6273,
"step": 65700
},
{
"epoch": 0.6429862705819124,
"grad_norm": 0.467869371175766,
"learning_rate": 1.7852430979721476e-05,
"loss": 0.6344,
"step": 65800
},
{
"epoch": 0.6439634533639518,
"grad_norm": 1.0182818174362183,
"learning_rate": 1.7803567065721967e-05,
"loss": 0.612,
"step": 65900
},
{
"epoch": 0.6449406361459911,
"grad_norm": 0.5325811505317688,
"learning_rate": 1.7754703151722455e-05,
"loss": 0.6427,
"step": 66000
},
{
"epoch": 0.6459178189280305,
"grad_norm": 1.1324542760849,
"learning_rate": 1.7705839237722943e-05,
"loss": 0.6161,
"step": 66100
},
{
"epoch": 0.6468950017100699,
"grad_norm": 0.7836804389953613,
"learning_rate": 1.765697532372343e-05,
"loss": 0.632,
"step": 66200
},
{
"epoch": 0.6478721844921093,
"grad_norm": 0.6157903075218201,
"learning_rate": 1.760811140972392e-05,
"loss": 0.6497,
"step": 66300
},
{
"epoch": 0.6488493672741487,
"grad_norm": 0.776150643825531,
"learning_rate": 1.755924749572441e-05,
"loss": 0.5929,
"step": 66400
},
{
"epoch": 0.6498265500561881,
"grad_norm": 0.6307646036148071,
"learning_rate": 1.7510383581724896e-05,
"loss": 0.66,
"step": 66500
},
{
"epoch": 0.6508037328382273,
"grad_norm": 0.5305992364883423,
"learning_rate": 1.7461519667725384e-05,
"loss": 0.5985,
"step": 66600
},
{
"epoch": 0.6517809156202667,
"grad_norm": 0.6581500172615051,
"learning_rate": 1.7412655753725872e-05,
"loss": 0.6393,
"step": 66700
},
{
"epoch": 0.6527580984023061,
"grad_norm": 1.0988273620605469,
"learning_rate": 1.7363791839726363e-05,
"loss": 0.6453,
"step": 66800
},
{
"epoch": 0.6537352811843455,
"grad_norm": 0.6662785410881042,
"learning_rate": 1.731492792572685e-05,
"loss": 0.6831,
"step": 66900
},
{
"epoch": 0.6547124639663849,
"grad_norm": 0.5156288743019104,
"learning_rate": 1.726606401172734e-05,
"loss": 0.647,
"step": 67000
},
{
"epoch": 0.6556896467484243,
"grad_norm": 0.8832482695579529,
"learning_rate": 1.7217200097727827e-05,
"loss": 0.6263,
"step": 67100
},
{
"epoch": 0.6566668295304636,
"grad_norm": 0.8194277882575989,
"learning_rate": 1.716833618372832e-05,
"loss": 0.6293,
"step": 67200
},
{
"epoch": 0.657644012312503,
"grad_norm": 0.5544142127037048,
"learning_rate": 1.7119472269728807e-05,
"loss": 0.6207,
"step": 67300
},
{
"epoch": 0.6586211950945424,
"grad_norm": 1.0161030292510986,
"learning_rate": 1.7070608355729295e-05,
"loss": 0.6166,
"step": 67400
},
{
"epoch": 0.6595983778765818,
"grad_norm": 1.1273646354675293,
"learning_rate": 1.7021744441729783e-05,
"loss": 0.6326,
"step": 67500
},
{
"epoch": 0.6605755606586212,
"grad_norm": 0.5743687748908997,
"learning_rate": 1.697288052773027e-05,
"loss": 0.5943,
"step": 67600
},
{
"epoch": 0.6615527434406606,
"grad_norm": 0.5743625164031982,
"learning_rate": 1.6924016613730763e-05,
"loss": 0.6337,
"step": 67700
},
{
"epoch": 0.6625299262226999,
"grad_norm": 0.47358232736587524,
"learning_rate": 1.687515269973125e-05,
"loss": 0.6272,
"step": 67800
},
{
"epoch": 0.6635071090047393,
"grad_norm": 0.7825568318367004,
"learning_rate": 1.682628878573174e-05,
"loss": 0.6407,
"step": 67900
},
{
"epoch": 0.6644842917867787,
"grad_norm": 1.0739299058914185,
"learning_rate": 1.6777424871732227e-05,
"loss": 0.6213,
"step": 68000
},
{
"epoch": 0.6654614745688181,
"grad_norm": 0.6242460608482361,
"learning_rate": 1.6728560957732715e-05,
"loss": 0.6247,
"step": 68100
},
{
"epoch": 0.6664386573508575,
"grad_norm": 0.674392580986023,
"learning_rate": 1.6679697043733203e-05,
"loss": 0.6405,
"step": 68200
},
{
"epoch": 0.6674158401328969,
"grad_norm": 0.4114531874656677,
"learning_rate": 1.663083312973369e-05,
"loss": 0.6235,
"step": 68300
},
{
"epoch": 0.6683930229149362,
"grad_norm": 0.5812088847160339,
"learning_rate": 1.658196921573418e-05,
"loss": 0.6175,
"step": 68400
},
{
"epoch": 0.6693702056969756,
"grad_norm": 0.48696669936180115,
"learning_rate": 1.653310530173467e-05,
"loss": 0.6264,
"step": 68500
},
{
"epoch": 0.670347388479015,
"grad_norm": 0.5733768939971924,
"learning_rate": 1.648424138773516e-05,
"loss": 0.6371,
"step": 68600
},
{
"epoch": 0.6713245712610544,
"grad_norm": 0.9609115123748779,
"learning_rate": 1.6435377473735647e-05,
"loss": 0.618,
"step": 68700
},
{
"epoch": 0.6723017540430938,
"grad_norm": 1.226388692855835,
"learning_rate": 1.6386513559736135e-05,
"loss": 0.6499,
"step": 68800
},
{
"epoch": 0.6732789368251332,
"grad_norm": 0.6776556372642517,
"learning_rate": 1.6337649645736626e-05,
"loss": 0.6356,
"step": 68900
},
{
"epoch": 0.6742561196071726,
"grad_norm": 0.6129021644592285,
"learning_rate": 1.6288785731737114e-05,
"loss": 0.6133,
"step": 69000
},
{
"epoch": 0.6752333023892119,
"grad_norm": 1.4161570072174072,
"learning_rate": 1.6239921817737602e-05,
"loss": 0.6419,
"step": 69100
},
{
"epoch": 0.6762104851712513,
"grad_norm": 0.5857706665992737,
"learning_rate": 1.619105790373809e-05,
"loss": 0.6227,
"step": 69200
},
{
"epoch": 0.6771876679532907,
"grad_norm": 0.933807909488678,
"learning_rate": 1.614219398973858e-05,
"loss": 0.6392,
"step": 69300
},
{
"epoch": 0.6781648507353301,
"grad_norm": 0.9411168098449707,
"learning_rate": 1.609333007573907e-05,
"loss": 0.649,
"step": 69400
},
{
"epoch": 0.6791420335173695,
"grad_norm": 0.5923060178756714,
"learning_rate": 1.6044466161739554e-05,
"loss": 0.6286,
"step": 69500
},
{
"epoch": 0.6801192162994089,
"grad_norm": 0.744339108467102,
"learning_rate": 1.5995602247740043e-05,
"loss": 0.6178,
"step": 69600
},
{
"epoch": 0.6810963990814481,
"grad_norm": 1.0202040672302246,
"learning_rate": 1.5946738333740534e-05,
"loss": 0.6254,
"step": 69700
},
{
"epoch": 0.6820735818634875,
"grad_norm": 0.8653994798660278,
"learning_rate": 1.5897874419741022e-05,
"loss": 0.6214,
"step": 69800
},
{
"epoch": 0.6830507646455269,
"grad_norm": 0.4566790461540222,
"learning_rate": 1.584901050574151e-05,
"loss": 0.6517,
"step": 69900
},
{
"epoch": 0.6840279474275663,
"grad_norm": 0.9629371166229248,
"learning_rate": 1.5800146591741998e-05,
"loss": 0.6359,
"step": 70000
},
{
"epoch": 0.6850051302096057,
"grad_norm": 0.7253994941711426,
"learning_rate": 1.5751282677742486e-05,
"loss": 0.6405,
"step": 70100
},
{
"epoch": 0.6859823129916451,
"grad_norm": 0.8287329077720642,
"learning_rate": 1.5702418763742978e-05,
"loss": 0.6085,
"step": 70200
},
{
"epoch": 0.6869594957736844,
"grad_norm": 0.5002869367599487,
"learning_rate": 1.5653554849743466e-05,
"loss": 0.6255,
"step": 70300
},
{
"epoch": 0.6879366785557238,
"grad_norm": 0.4376012682914734,
"learning_rate": 1.5604690935743954e-05,
"loss": 0.5933,
"step": 70400
},
{
"epoch": 0.6889138613377632,
"grad_norm": 0.756737232208252,
"learning_rate": 1.5555827021744442e-05,
"loss": 0.609,
"step": 70500
},
{
"epoch": 0.6898910441198026,
"grad_norm": 1.1462029218673706,
"learning_rate": 1.5506963107744933e-05,
"loss": 0.6349,
"step": 70600
},
{
"epoch": 0.690868226901842,
"grad_norm": 0.5806009769439697,
"learning_rate": 1.545809919374542e-05,
"loss": 0.6242,
"step": 70700
},
{
"epoch": 0.6918454096838814,
"grad_norm": 0.41798803210258484,
"learning_rate": 1.540923527974591e-05,
"loss": 0.6688,
"step": 70800
},
{
"epoch": 0.6928225924659207,
"grad_norm": 0.5598849058151245,
"learning_rate": 1.5360371365746398e-05,
"loss": 0.6371,
"step": 70900
},
{
"epoch": 0.6937997752479601,
"grad_norm": 1.0417990684509277,
"learning_rate": 1.5311507451746886e-05,
"loss": 0.5966,
"step": 71000
},
{
"epoch": 0.6947769580299995,
"grad_norm": 0.5547340512275696,
"learning_rate": 1.5262643537747374e-05,
"loss": 0.6221,
"step": 71100
},
{
"epoch": 0.6957541408120389,
"grad_norm": 0.4499816298484802,
"learning_rate": 1.5213779623747862e-05,
"loss": 0.6194,
"step": 71200
},
{
"epoch": 0.6967313235940783,
"grad_norm": 2.521627902984619,
"learning_rate": 1.5164915709748351e-05,
"loss": 0.6279,
"step": 71300
},
{
"epoch": 0.6977085063761177,
"grad_norm": 1.0940284729003906,
"learning_rate": 1.511605179574884e-05,
"loss": 0.6376,
"step": 71400
},
{
"epoch": 0.698685689158157,
"grad_norm": 0.515785276889801,
"learning_rate": 1.5067187881749328e-05,
"loss": 0.6046,
"step": 71500
},
{
"epoch": 0.6996628719401964,
"grad_norm": 0.5034206509590149,
"learning_rate": 1.5018323967749817e-05,
"loss": 0.6036,
"step": 71600
},
{
"epoch": 0.7006400547222358,
"grad_norm": 0.6637565493583679,
"learning_rate": 1.4969460053750305e-05,
"loss": 0.6288,
"step": 71700
},
{
"epoch": 0.7016172375042752,
"grad_norm": 0.7677326202392578,
"learning_rate": 1.4920596139750795e-05,
"loss": 0.655,
"step": 71800
},
{
"epoch": 0.7025944202863146,
"grad_norm": 0.6796774864196777,
"learning_rate": 1.4871732225751283e-05,
"loss": 0.5955,
"step": 71900
},
{
"epoch": 0.703571603068354,
"grad_norm": 0.9217430353164673,
"learning_rate": 1.4822868311751773e-05,
"loss": 0.6268,
"step": 72000
},
{
"epoch": 0.7045487858503933,
"grad_norm": 0.846118688583374,
"learning_rate": 1.4774004397752261e-05,
"loss": 0.6345,
"step": 72100
},
{
"epoch": 0.7055259686324327,
"grad_norm": 0.7406280040740967,
"learning_rate": 1.472514048375275e-05,
"loss": 0.631,
"step": 72200
},
{
"epoch": 0.7065031514144721,
"grad_norm": 0.8265899419784546,
"learning_rate": 1.4676276569753239e-05,
"loss": 0.6135,
"step": 72300
},
{
"epoch": 0.7074803341965115,
"grad_norm": 0.7813581228256226,
"learning_rate": 1.4627412655753727e-05,
"loss": 0.6448,
"step": 72400
},
{
"epoch": 0.7084575169785509,
"grad_norm": 0.4718623757362366,
"learning_rate": 1.4578548741754217e-05,
"loss": 0.5952,
"step": 72500
},
{
"epoch": 0.7094346997605903,
"grad_norm": 2.193324565887451,
"learning_rate": 1.4529684827754703e-05,
"loss": 0.6199,
"step": 72600
},
{
"epoch": 0.7104118825426295,
"grad_norm": 1.0357561111450195,
"learning_rate": 1.4480820913755191e-05,
"loss": 0.6342,
"step": 72700
},
{
"epoch": 0.711389065324669,
"grad_norm": 1.0319572687149048,
"learning_rate": 1.4431956999755681e-05,
"loss": 0.5836,
"step": 72800
},
{
"epoch": 0.7123662481067083,
"grad_norm": 1.0852116346359253,
"learning_rate": 1.4383093085756169e-05,
"loss": 0.6246,
"step": 72900
},
{
"epoch": 0.7133434308887477,
"grad_norm": 0.5591370463371277,
"learning_rate": 1.4334229171756659e-05,
"loss": 0.6022,
"step": 73000
},
{
"epoch": 0.7143206136707871,
"grad_norm": 1.129408836364746,
"learning_rate": 1.4285365257757147e-05,
"loss": 0.6414,
"step": 73100
},
{
"epoch": 0.7152977964528265,
"grad_norm": 0.9241653680801392,
"learning_rate": 1.4236501343757635e-05,
"loss": 0.5954,
"step": 73200
},
{
"epoch": 0.7162749792348658,
"grad_norm": 0.5140904188156128,
"learning_rate": 1.4187637429758125e-05,
"loss": 0.6499,
"step": 73300
},
{
"epoch": 0.7172521620169052,
"grad_norm": 0.8134740591049194,
"learning_rate": 1.4138773515758613e-05,
"loss": 0.6199,
"step": 73400
},
{
"epoch": 0.7182293447989446,
"grad_norm": 0.8259909749031067,
"learning_rate": 1.4089909601759102e-05,
"loss": 0.6181,
"step": 73500
},
{
"epoch": 0.719206527580984,
"grad_norm": 0.7081485390663147,
"learning_rate": 1.404104568775959e-05,
"loss": 0.6056,
"step": 73600
},
{
"epoch": 0.7201837103630234,
"grad_norm": 0.7906745076179504,
"learning_rate": 1.399218177376008e-05,
"loss": 0.6341,
"step": 73700
},
{
"epoch": 0.7211608931450628,
"grad_norm": 0.5661380290985107,
"learning_rate": 1.3943317859760568e-05,
"loss": 0.621,
"step": 73800
},
{
"epoch": 0.7221380759271022,
"grad_norm": 1.0971596240997314,
"learning_rate": 1.3894453945761058e-05,
"loss": 0.6261,
"step": 73900
},
{
"epoch": 0.7231152587091415,
"grad_norm": 1.6842643022537231,
"learning_rate": 1.3845590031761546e-05,
"loss": 0.6065,
"step": 74000
},
{
"epoch": 0.7240924414911809,
"grad_norm": 1.0033600330352783,
"learning_rate": 1.3796726117762033e-05,
"loss": 0.6364,
"step": 74100
},
{
"epoch": 0.7250696242732203,
"grad_norm": 0.8704243898391724,
"learning_rate": 1.374786220376252e-05,
"loss": 0.6259,
"step": 74200
},
{
"epoch": 0.7260468070552597,
"grad_norm": 0.855398416519165,
"learning_rate": 1.369899828976301e-05,
"loss": 0.653,
"step": 74300
},
{
"epoch": 0.7270239898372991,
"grad_norm": 1.733904480934143,
"learning_rate": 1.3650134375763498e-05,
"loss": 0.6284,
"step": 74400
},
{
"epoch": 0.7280011726193385,
"grad_norm": 0.49585819244384766,
"learning_rate": 1.3601270461763988e-05,
"loss": 0.6165,
"step": 74500
},
{
"epoch": 0.7289783554013778,
"grad_norm": 0.5818326473236084,
"learning_rate": 1.3552406547764476e-05,
"loss": 0.6403,
"step": 74600
},
{
"epoch": 0.7299555381834172,
"grad_norm": 0.8778244853019714,
"learning_rate": 1.3503542633764964e-05,
"loss": 0.5963,
"step": 74700
},
{
"epoch": 0.7309327209654566,
"grad_norm": 0.6378918290138245,
"learning_rate": 1.3454678719765454e-05,
"loss": 0.6242,
"step": 74800
},
{
"epoch": 0.731909903747496,
"grad_norm": 0.792775571346283,
"learning_rate": 1.3405814805765942e-05,
"loss": 0.6348,
"step": 74900
},
{
"epoch": 0.7328870865295354,
"grad_norm": 0.8906835317611694,
"learning_rate": 1.3356950891766432e-05,
"loss": 0.6074,
"step": 75000
},
{
"epoch": 0.7338642693115748,
"grad_norm": 0.7266893982887268,
"learning_rate": 1.330808697776692e-05,
"loss": 0.6253,
"step": 75100
},
{
"epoch": 0.7348414520936141,
"grad_norm": 0.6896129250526428,
"learning_rate": 1.325922306376741e-05,
"loss": 0.6273,
"step": 75200
},
{
"epoch": 0.7358186348756535,
"grad_norm": 1.0812867879867554,
"learning_rate": 1.3210359149767898e-05,
"loss": 0.6474,
"step": 75300
},
{
"epoch": 0.7367958176576929,
"grad_norm": 0.6664975881576538,
"learning_rate": 1.3161495235768388e-05,
"loss": 0.6114,
"step": 75400
},
{
"epoch": 0.7377730004397323,
"grad_norm": 0.6565041542053223,
"learning_rate": 1.3112631321768876e-05,
"loss": 0.6059,
"step": 75500
},
{
"epoch": 0.7387501832217717,
"grad_norm": 0.5191747546195984,
"learning_rate": 1.3063767407769362e-05,
"loss": 0.6,
"step": 75600
},
{
"epoch": 0.7397273660038111,
"grad_norm": 0.9525347948074341,
"learning_rate": 1.301490349376985e-05,
"loss": 0.6032,
"step": 75700
},
{
"epoch": 0.7407045487858503,
"grad_norm": 1.1167237758636475,
"learning_rate": 1.296603957977034e-05,
"loss": 0.6095,
"step": 75800
},
{
"epoch": 0.7416817315678897,
"grad_norm": 0.8300033807754517,
"learning_rate": 1.2917175665770828e-05,
"loss": 0.6246,
"step": 75900
},
{
"epoch": 0.7426589143499291,
"grad_norm": 0.7098196148872375,
"learning_rate": 1.2868311751771318e-05,
"loss": 0.6188,
"step": 76000
},
{
"epoch": 0.7436360971319685,
"grad_norm": 0.42002958059310913,
"learning_rate": 1.2819447837771806e-05,
"loss": 0.5943,
"step": 76100
},
{
"epoch": 0.7446132799140079,
"grad_norm": 0.7477664947509766,
"learning_rate": 1.2770583923772295e-05,
"loss": 0.6368,
"step": 76200
},
{
"epoch": 0.7455904626960473,
"grad_norm": 1.2381956577301025,
"learning_rate": 1.2721720009772783e-05,
"loss": 0.6528,
"step": 76300
},
{
"epoch": 0.7465676454780866,
"grad_norm": 0.46650367975234985,
"learning_rate": 1.2672856095773272e-05,
"loss": 0.6062,
"step": 76400
},
{
"epoch": 0.747544828260126,
"grad_norm": 0.9223760366439819,
"learning_rate": 1.2623992181773761e-05,
"loss": 0.6386,
"step": 76500
},
{
"epoch": 0.7485220110421654,
"grad_norm": 0.6782642602920532,
"learning_rate": 1.257512826777425e-05,
"loss": 0.5926,
"step": 76600
},
{
"epoch": 0.7494991938242048,
"grad_norm": 0.8533148765563965,
"learning_rate": 1.2526264353774739e-05,
"loss": 0.6076,
"step": 76700
},
{
"epoch": 0.7504763766062442,
"grad_norm": 0.6998764276504517,
"learning_rate": 1.2477400439775225e-05,
"loss": 0.6136,
"step": 76800
},
{
"epoch": 0.7514535593882836,
"grad_norm": 0.4632514715194702,
"learning_rate": 1.2428536525775715e-05,
"loss": 0.6174,
"step": 76900
},
{
"epoch": 0.7524307421703229,
"grad_norm": 0.6624991297721863,
"learning_rate": 1.2379672611776203e-05,
"loss": 0.6053,
"step": 77000
},
{
"epoch": 0.7534079249523623,
"grad_norm": 0.8521330952644348,
"learning_rate": 1.2330808697776693e-05,
"loss": 0.6261,
"step": 77100
},
{
"epoch": 0.7543851077344017,
"grad_norm": 0.6917625665664673,
"learning_rate": 1.2281944783777181e-05,
"loss": 0.6049,
"step": 77200
},
{
"epoch": 0.7553622905164411,
"grad_norm": 0.4985372722148895,
"learning_rate": 1.2233080869777671e-05,
"loss": 0.6057,
"step": 77300
},
{
"epoch": 0.7563394732984805,
"grad_norm": 0.6484245657920837,
"learning_rate": 1.2184216955778159e-05,
"loss": 0.602,
"step": 77400
},
{
"epoch": 0.7573166560805199,
"grad_norm": 0.7993507981300354,
"learning_rate": 1.2135353041778647e-05,
"loss": 0.5809,
"step": 77500
},
{
"epoch": 0.7582938388625592,
"grad_norm": 0.6944275498390198,
"learning_rate": 1.2086489127779135e-05,
"loss": 0.5959,
"step": 77600
},
{
"epoch": 0.7592710216445986,
"grad_norm": 0.6688080430030823,
"learning_rate": 1.2037625213779625e-05,
"loss": 0.6038,
"step": 77700
},
{
"epoch": 0.760248204426638,
"grad_norm": 0.8234009742736816,
"learning_rate": 1.1988761299780113e-05,
"loss": 0.6287,
"step": 77800
},
{
"epoch": 0.7612253872086774,
"grad_norm": 1.0987696647644043,
"learning_rate": 1.1939897385780601e-05,
"loss": 0.631,
"step": 77900
},
{
"epoch": 0.7622025699907168,
"grad_norm": 0.7760794758796692,
"learning_rate": 1.189103347178109e-05,
"loss": 0.6356,
"step": 78000
},
{
"epoch": 0.7631797527727562,
"grad_norm": 1.422297716140747,
"learning_rate": 1.1842169557781579e-05,
"loss": 0.5983,
"step": 78100
},
{
"epoch": 0.7641569355547956,
"grad_norm": 0.7743082046508789,
"learning_rate": 1.1793305643782067e-05,
"loss": 0.6132,
"step": 78200
},
{
"epoch": 0.7651341183368349,
"grad_norm": 1.0263071060180664,
"learning_rate": 1.1744441729782555e-05,
"loss": 0.6364,
"step": 78300
},
{
"epoch": 0.7661113011188743,
"grad_norm": 0.49797773361206055,
"learning_rate": 1.1695577815783045e-05,
"loss": 0.6384,
"step": 78400
},
{
"epoch": 0.7670884839009137,
"grad_norm": 0.58949214220047,
"learning_rate": 1.1646713901783533e-05,
"loss": 0.6176,
"step": 78500
},
{
"epoch": 0.7680656666829531,
"grad_norm": 0.8523328304290771,
"learning_rate": 1.1597849987784022e-05,
"loss": 0.6238,
"step": 78600
},
{
"epoch": 0.7690428494649925,
"grad_norm": 2.231853723526001,
"learning_rate": 1.154898607378451e-05,
"loss": 0.6553,
"step": 78700
},
{
"epoch": 0.7700200322470319,
"grad_norm": 0.7179421782493591,
"learning_rate": 1.1500122159785e-05,
"loss": 0.6222,
"step": 78800
},
{
"epoch": 0.7709972150290711,
"grad_norm": 0.7334624528884888,
"learning_rate": 1.1451258245785488e-05,
"loss": 0.6513,
"step": 78900
},
{
"epoch": 0.7719743978111105,
"grad_norm": 0.8650888204574585,
"learning_rate": 1.1402394331785976e-05,
"loss": 0.6382,
"step": 79000
},
{
"epoch": 0.77295158059315,
"grad_norm": 1.277421474456787,
"learning_rate": 1.1353530417786465e-05,
"loss": 0.6032,
"step": 79100
},
{
"epoch": 0.7739287633751893,
"grad_norm": 0.4764556288719177,
"learning_rate": 1.1304666503786954e-05,
"loss": 0.5852,
"step": 79200
},
{
"epoch": 0.7749059461572287,
"grad_norm": 0.7180933952331543,
"learning_rate": 1.1255802589787442e-05,
"loss": 0.6271,
"step": 79300
},
{
"epoch": 0.7758831289392681,
"grad_norm": 0.6978940367698669,
"learning_rate": 1.1206938675787932e-05,
"loss": 0.6252,
"step": 79400
},
{
"epoch": 0.7768603117213074,
"grad_norm": 0.9205247759819031,
"learning_rate": 1.115807476178842e-05,
"loss": 0.6227,
"step": 79500
},
{
"epoch": 0.7778374945033468,
"grad_norm": 0.6126120686531067,
"learning_rate": 1.1109210847788908e-05,
"loss": 0.6164,
"step": 79600
},
{
"epoch": 0.7788146772853862,
"grad_norm": 0.660234808921814,
"learning_rate": 1.1060346933789396e-05,
"loss": 0.6336,
"step": 79700
},
{
"epoch": 0.7797918600674256,
"grad_norm": 0.5239884257316589,
"learning_rate": 1.1011483019789886e-05,
"loss": 0.6324,
"step": 79800
},
{
"epoch": 0.780769042849465,
"grad_norm": 0.6763221621513367,
"learning_rate": 1.0962619105790374e-05,
"loss": 0.6063,
"step": 79900
},
{
"epoch": 0.7817462256315044,
"grad_norm": 0.6201728582382202,
"learning_rate": 1.0913755191790862e-05,
"loss": 0.6168,
"step": 80000
},
{
"epoch": 0.7827234084135437,
"grad_norm": 0.8859091997146606,
"learning_rate": 1.0864891277791352e-05,
"loss": 0.593,
"step": 80100
},
{
"epoch": 0.7837005911955831,
"grad_norm": 0.7334877848625183,
"learning_rate": 1.081602736379184e-05,
"loss": 0.6225,
"step": 80200
},
{
"epoch": 0.7846777739776225,
"grad_norm": 0.49573615193367004,
"learning_rate": 1.076716344979233e-05,
"loss": 0.6007,
"step": 80300
},
{
"epoch": 0.7856549567596619,
"grad_norm": 1.1509833335876465,
"learning_rate": 1.0718299535792818e-05,
"loss": 0.587,
"step": 80400
},
{
"epoch": 0.7866321395417013,
"grad_norm": 0.6591099500656128,
"learning_rate": 1.0669435621793306e-05,
"loss": 0.6462,
"step": 80500
},
{
"epoch": 0.7876093223237407,
"grad_norm": 0.7265052199363708,
"learning_rate": 1.0620571707793794e-05,
"loss": 0.6183,
"step": 80600
},
{
"epoch": 0.78858650510578,
"grad_norm": 1.2156593799591064,
"learning_rate": 1.0571707793794284e-05,
"loss": 0.5811,
"step": 80700
},
{
"epoch": 0.7895636878878194,
"grad_norm": 0.960753858089447,
"learning_rate": 1.0522843879794772e-05,
"loss": 0.6054,
"step": 80800
},
{
"epoch": 0.7905408706698588,
"grad_norm": 1.5062034130096436,
"learning_rate": 1.0473979965795262e-05,
"loss": 0.599,
"step": 80900
},
{
"epoch": 0.7915180534518982,
"grad_norm": 0.7047529816627502,
"learning_rate": 1.042511605179575e-05,
"loss": 0.6149,
"step": 81000
},
{
"epoch": 0.7924952362339376,
"grad_norm": 0.4432947337627411,
"learning_rate": 1.037625213779624e-05,
"loss": 0.6182,
"step": 81100
},
{
"epoch": 0.793472419015977,
"grad_norm": 0.6442515850067139,
"learning_rate": 1.0327388223796726e-05,
"loss": 0.5864,
"step": 81200
},
{
"epoch": 0.7944496017980163,
"grad_norm": 1.2354743480682373,
"learning_rate": 1.0278524309797215e-05,
"loss": 0.6068,
"step": 81300
},
{
"epoch": 0.7954267845800557,
"grad_norm": 0.7862667441368103,
"learning_rate": 1.0229660395797704e-05,
"loss": 0.6072,
"step": 81400
},
{
"epoch": 0.7964039673620951,
"grad_norm": 0.5142656564712524,
"learning_rate": 1.0180796481798192e-05,
"loss": 0.6009,
"step": 81500
},
{
"epoch": 0.7973811501441345,
"grad_norm": 0.8478522300720215,
"learning_rate": 1.0131932567798681e-05,
"loss": 0.5979,
"step": 81600
},
{
"epoch": 0.7983583329261739,
"grad_norm": 0.5929884910583496,
"learning_rate": 1.008306865379917e-05,
"loss": 0.6076,
"step": 81700
},
{
"epoch": 0.7993355157082133,
"grad_norm": 0.8067489862442017,
"learning_rate": 1.003420473979966e-05,
"loss": 0.6123,
"step": 81800
},
{
"epoch": 0.8003126984902525,
"grad_norm": 1.3287664651870728,
"learning_rate": 9.985340825800147e-06,
"loss": 0.6151,
"step": 81900
},
{
"epoch": 0.801289881272292,
"grad_norm": 0.7158493995666504,
"learning_rate": 9.936476911800635e-06,
"loss": 0.5906,
"step": 82000
},
{
"epoch": 0.8022670640543313,
"grad_norm": 0.7307409644126892,
"learning_rate": 9.887612997801123e-06,
"loss": 0.6165,
"step": 82100
},
{
"epoch": 0.8032442468363707,
"grad_norm": 0.6903741359710693,
"learning_rate": 9.838749083801613e-06,
"loss": 0.6175,
"step": 82200
},
{
"epoch": 0.8042214296184101,
"grad_norm": 0.7754660248756409,
"learning_rate": 9.789885169802101e-06,
"loss": 0.6349,
"step": 82300
},
{
"epoch": 0.8051986124004495,
"grad_norm": 0.7808040976524353,
"learning_rate": 9.741021255802591e-06,
"loss": 0.5909,
"step": 82400
},
{
"epoch": 0.8061757951824888,
"grad_norm": 0.8575007915496826,
"learning_rate": 9.692157341803079e-06,
"loss": 0.5861,
"step": 82500
},
{
"epoch": 0.8071529779645282,
"grad_norm": 1.18577241897583,
"learning_rate": 9.643293427803569e-06,
"loss": 0.6137,
"step": 82600
},
{
"epoch": 0.8081301607465676,
"grad_norm": 0.7913909554481506,
"learning_rate": 9.594429513804057e-06,
"loss": 0.6077,
"step": 82700
},
{
"epoch": 0.809107343528607,
"grad_norm": 0.8221011161804199,
"learning_rate": 9.545565599804545e-06,
"loss": 0.5946,
"step": 82800
},
{
"epoch": 0.8100845263106464,
"grad_norm": 0.7047521471977234,
"learning_rate": 9.496701685805033e-06,
"loss": 0.5973,
"step": 82900
},
{
"epoch": 0.8110617090926858,
"grad_norm": 0.5717597007751465,
"learning_rate": 9.447837771805523e-06,
"loss": 0.6236,
"step": 83000
},
{
"epoch": 0.8120388918747252,
"grad_norm": 0.93315190076828,
"learning_rate": 9.39897385780601e-06,
"loss": 0.6335,
"step": 83100
},
{
"epoch": 0.8130160746567645,
"grad_norm": 0.7691722512245178,
"learning_rate": 9.350109943806499e-06,
"loss": 0.5986,
"step": 83200
},
{
"epoch": 0.8139932574388039,
"grad_norm": 0.8947746157646179,
"learning_rate": 9.301246029806989e-06,
"loss": 0.5995,
"step": 83300
},
{
"epoch": 0.8149704402208433,
"grad_norm": 0.8654600381851196,
"learning_rate": 9.252382115807477e-06,
"loss": 0.5844,
"step": 83400
},
{
"epoch": 0.8159476230028827,
"grad_norm": 0.6563751697540283,
"learning_rate": 9.203518201807965e-06,
"loss": 0.588,
"step": 83500
},
{
"epoch": 0.8169248057849221,
"grad_norm": 0.756237804889679,
"learning_rate": 9.154654287808453e-06,
"loss": 0.5814,
"step": 83600
},
{
"epoch": 0.8179019885669615,
"grad_norm": 1.106650948524475,
"learning_rate": 9.105790373808943e-06,
"loss": 0.5924,
"step": 83700
},
{
"epoch": 0.8188791713490008,
"grad_norm": 0.39193272590637207,
"learning_rate": 9.05692645980943e-06,
"loss": 0.6048,
"step": 83800
},
{
"epoch": 0.8198563541310402,
"grad_norm": 0.7022530436515808,
"learning_rate": 9.00806254580992e-06,
"loss": 0.624,
"step": 83900
},
{
"epoch": 0.8208335369130796,
"grad_norm": 0.7286639213562012,
"learning_rate": 8.959198631810408e-06,
"loss": 0.5825,
"step": 84000
},
{
"epoch": 0.821810719695119,
"grad_norm": 0.9062661528587341,
"learning_rate": 8.910334717810898e-06,
"loss": 0.6024,
"step": 84100
},
{
"epoch": 0.8227879024771584,
"grad_norm": 1.0051745176315308,
"learning_rate": 8.861470803811386e-06,
"loss": 0.5881,
"step": 84200
},
{
"epoch": 0.8237650852591978,
"grad_norm": 0.5622514486312866,
"learning_rate": 8.812606889811874e-06,
"loss": 0.625,
"step": 84300
},
{
"epoch": 0.8247422680412371,
"grad_norm": 0.80225670337677,
"learning_rate": 8.763742975812362e-06,
"loss": 0.6142,
"step": 84400
},
{
"epoch": 0.8257194508232765,
"grad_norm": 0.7154406905174255,
"learning_rate": 8.714879061812852e-06,
"loss": 0.6009,
"step": 84500
},
{
"epoch": 0.8266966336053159,
"grad_norm": 0.8191014528274536,
"learning_rate": 8.66601514781334e-06,
"loss": 0.6054,
"step": 84600
},
{
"epoch": 0.8276738163873553,
"grad_norm": 1.4982640743255615,
"learning_rate": 8.617151233813828e-06,
"loss": 0.5917,
"step": 84700
},
{
"epoch": 0.8286509991693947,
"grad_norm": 0.6662930250167847,
"learning_rate": 8.568287319814318e-06,
"loss": 0.6047,
"step": 84800
},
{
"epoch": 0.8296281819514341,
"grad_norm": 0.8533642888069153,
"learning_rate": 8.519423405814806e-06,
"loss": 0.6275,
"step": 84900
},
{
"epoch": 0.8306053647334734,
"grad_norm": 1.0405080318450928,
"learning_rate": 8.470559491815294e-06,
"loss": 0.6325,
"step": 85000
},
{
"epoch": 0.8315825475155127,
"grad_norm": 0.3838236629962921,
"learning_rate": 8.421695577815782e-06,
"loss": 0.617,
"step": 85100
},
{
"epoch": 0.8325597302975521,
"grad_norm": 0.7229349613189697,
"learning_rate": 8.372831663816272e-06,
"loss": 0.6095,
"step": 85200
},
{
"epoch": 0.8335369130795915,
"grad_norm": 0.538932204246521,
"learning_rate": 8.32396774981676e-06,
"loss": 0.597,
"step": 85300
},
{
"epoch": 0.834514095861631,
"grad_norm": 0.9081258177757263,
"learning_rate": 8.27510383581725e-06,
"loss": 0.576,
"step": 85400
},
{
"epoch": 0.8354912786436703,
"grad_norm": 1.1647875308990479,
"learning_rate": 8.226239921817738e-06,
"loss": 0.6177,
"step": 85500
},
{
"epoch": 0.8364684614257096,
"grad_norm": 0.5544024705886841,
"learning_rate": 8.177376007818228e-06,
"loss": 0.5944,
"step": 85600
},
{
"epoch": 0.837445644207749,
"grad_norm": 0.49571287631988525,
"learning_rate": 8.128512093818716e-06,
"loss": 0.6417,
"step": 85700
},
{
"epoch": 0.8384228269897884,
"grad_norm": 0.8068299293518066,
"learning_rate": 8.079648179819204e-06,
"loss": 0.6224,
"step": 85800
},
{
"epoch": 0.8394000097718278,
"grad_norm": 0.9682297706604004,
"learning_rate": 8.030784265819692e-06,
"loss": 0.6111,
"step": 85900
},
{
"epoch": 0.8403771925538672,
"grad_norm": 1.051151990890503,
"learning_rate": 7.981920351820182e-06,
"loss": 0.6,
"step": 86000
},
{
"epoch": 0.8413543753359066,
"grad_norm": 0.568880558013916,
"learning_rate": 7.93305643782067e-06,
"loss": 0.6129,
"step": 86100
},
{
"epoch": 0.8423315581179459,
"grad_norm": 0.7681874632835388,
"learning_rate": 7.88419252382116e-06,
"loss": 0.6291,
"step": 86200
},
{
"epoch": 0.8433087408999853,
"grad_norm": 0.7521129250526428,
"learning_rate": 7.835328609821647e-06,
"loss": 0.5983,
"step": 86300
},
{
"epoch": 0.8442859236820247,
"grad_norm": 0.6910899877548218,
"learning_rate": 7.786464695822136e-06,
"loss": 0.6065,
"step": 86400
},
{
"epoch": 0.8452631064640641,
"grad_norm": 1.0774552822113037,
"learning_rate": 7.737600781822624e-06,
"loss": 0.6481,
"step": 86500
},
{
"epoch": 0.8462402892461035,
"grad_norm": 0.5744395852088928,
"learning_rate": 7.688736867823113e-06,
"loss": 0.5881,
"step": 86600
},
{
"epoch": 0.8472174720281429,
"grad_norm": 0.9754884839057922,
"learning_rate": 7.639872953823601e-06,
"loss": 0.6028,
"step": 86700
},
{
"epoch": 0.8481946548101822,
"grad_norm": 0.5664985775947571,
"learning_rate": 7.59100903982409e-06,
"loss": 0.5759,
"step": 86800
},
{
"epoch": 0.8491718375922216,
"grad_norm": 0.7173051238059998,
"learning_rate": 7.542145125824579e-06,
"loss": 0.6038,
"step": 86900
},
{
"epoch": 0.850149020374261,
"grad_norm": 0.5157271027565002,
"learning_rate": 7.493281211825068e-06,
"loss": 0.5872,
"step": 87000
},
{
"epoch": 0.8511262031563004,
"grad_norm": 2.847447156906128,
"learning_rate": 7.444417297825557e-06,
"loss": 0.6008,
"step": 87100
},
{
"epoch": 0.8521033859383398,
"grad_norm": 1.259730577468872,
"learning_rate": 7.395553383826045e-06,
"loss": 0.6047,
"step": 87200
},
{
"epoch": 0.8530805687203792,
"grad_norm": 0.5175238847732544,
"learning_rate": 7.346689469826533e-06,
"loss": 0.6294,
"step": 87300
},
{
"epoch": 0.8540577515024186,
"grad_norm": 0.5168502926826477,
"learning_rate": 7.297825555827022e-06,
"loss": 0.5987,
"step": 87400
},
{
"epoch": 0.8550349342844579,
"grad_norm": 0.7485826015472412,
"learning_rate": 7.24896164182751e-06,
"loss": 0.604,
"step": 87500
},
{
"epoch": 0.8560121170664973,
"grad_norm": 1.2643144130706787,
"learning_rate": 7.200097727827999e-06,
"loss": 0.6271,
"step": 87600
},
{
"epoch": 0.8569892998485367,
"grad_norm": 0.598031222820282,
"learning_rate": 7.151233813828488e-06,
"loss": 0.6201,
"step": 87700
},
{
"epoch": 0.8579664826305761,
"grad_norm": 0.7994399666786194,
"learning_rate": 7.102369899828977e-06,
"loss": 0.6028,
"step": 87800
},
{
"epoch": 0.8589436654126155,
"grad_norm": 0.47928521037101746,
"learning_rate": 7.053505985829466e-06,
"loss": 0.6042,
"step": 87900
},
{
"epoch": 0.8599208481946549,
"grad_norm": 0.6901227831840515,
"learning_rate": 7.004642071829953e-06,
"loss": 0.6289,
"step": 88000
},
{
"epoch": 0.8608980309766942,
"grad_norm": 0.9630447030067444,
"learning_rate": 6.955778157830442e-06,
"loss": 0.6097,
"step": 88100
},
{
"epoch": 0.8618752137587335,
"grad_norm": 0.42696672677993774,
"learning_rate": 6.906914243830931e-06,
"loss": 0.6314,
"step": 88200
},
{
"epoch": 0.862852396540773,
"grad_norm": 0.5964066982269287,
"learning_rate": 6.85805032983142e-06,
"loss": 0.5934,
"step": 88300
},
{
"epoch": 0.8638295793228123,
"grad_norm": 0.5652678608894348,
"learning_rate": 6.809186415831909e-06,
"loss": 0.6032,
"step": 88400
},
{
"epoch": 0.8648067621048517,
"grad_norm": 0.6129952669143677,
"learning_rate": 6.7603225018323976e-06,
"loss": 0.6116,
"step": 88500
},
{
"epoch": 0.8657839448868911,
"grad_norm": 0.5786252021789551,
"learning_rate": 6.7114585878328865e-06,
"loss": 0.6042,
"step": 88600
},
{
"epoch": 0.8667611276689304,
"grad_norm": 0.9830735325813293,
"learning_rate": 6.662594673833375e-06,
"loss": 0.5763,
"step": 88700
},
{
"epoch": 0.8677383104509698,
"grad_norm": 0.7167491316795349,
"learning_rate": 6.613730759833863e-06,
"loss": 0.5774,
"step": 88800
},
{
"epoch": 0.8687154932330092,
"grad_norm": 0.5763813257217407,
"learning_rate": 6.5648668458343515e-06,
"loss": 0.6219,
"step": 88900
},
{
"epoch": 0.8696926760150486,
"grad_norm": 0.552343487739563,
"learning_rate": 6.5160029318348404e-06,
"loss": 0.5983,
"step": 89000
},
{
"epoch": 0.870669858797088,
"grad_norm": 0.6471940279006958,
"learning_rate": 6.4671390178353285e-06,
"loss": 0.616,
"step": 89100
},
{
"epoch": 0.8716470415791274,
"grad_norm": 0.2821710407733917,
"learning_rate": 6.418275103835817e-06,
"loss": 0.6093,
"step": 89200
},
{
"epoch": 0.8726242243611667,
"grad_norm": 0.8784298896789551,
"learning_rate": 6.369411189836306e-06,
"loss": 0.6004,
"step": 89300
},
{
"epoch": 0.8736014071432061,
"grad_norm": 0.5774518847465515,
"learning_rate": 6.320547275836795e-06,
"loss": 0.6177,
"step": 89400
},
{
"epoch": 0.8745785899252455,
"grad_norm": 2.489976406097412,
"learning_rate": 6.2716833618372825e-06,
"loss": 0.6294,
"step": 89500
},
{
"epoch": 0.8755557727072849,
"grad_norm": 0.8063492774963379,
"learning_rate": 6.222819447837772e-06,
"loss": 0.5815,
"step": 89600
},
{
"epoch": 0.8765329554893243,
"grad_norm": 0.9328792095184326,
"learning_rate": 6.17395553383826e-06,
"loss": 0.5709,
"step": 89700
},
{
"epoch": 0.8775101382713637,
"grad_norm": 1.1980705261230469,
"learning_rate": 6.125091619838749e-06,
"loss": 0.5916,
"step": 89800
},
{
"epoch": 0.878487321053403,
"grad_norm": 0.9140294194221497,
"learning_rate": 6.076227705839238e-06,
"loss": 0.5975,
"step": 89900
},
{
"epoch": 0.8794645038354424,
"grad_norm": 0.42323464155197144,
"learning_rate": 6.027363791839727e-06,
"loss": 0.5908,
"step": 90000
},
{
"epoch": 0.8804416866174818,
"grad_norm": 0.8265115022659302,
"learning_rate": 5.978499877840215e-06,
"loss": 0.6236,
"step": 90100
},
{
"epoch": 0.8814188693995212,
"grad_norm": 0.6848395466804504,
"learning_rate": 5.929635963840704e-06,
"loss": 0.6081,
"step": 90200
},
{
"epoch": 0.8823960521815606,
"grad_norm": 0.8593265414237976,
"learning_rate": 5.880772049841193e-06,
"loss": 0.5926,
"step": 90300
},
{
"epoch": 0.8833732349636,
"grad_norm": 0.9084621667861938,
"learning_rate": 5.831908135841682e-06,
"loss": 0.5795,
"step": 90400
},
{
"epoch": 0.8843504177456393,
"grad_norm": 0.5158432126045227,
"learning_rate": 5.78304422184217e-06,
"loss": 0.5887,
"step": 90500
},
{
"epoch": 0.8853276005276787,
"grad_norm": 0.9710085988044739,
"learning_rate": 5.734180307842659e-06,
"loss": 0.5888,
"step": 90600
},
{
"epoch": 0.8863047833097181,
"grad_norm": 0.4963410794734955,
"learning_rate": 5.685316393843147e-06,
"loss": 0.5981,
"step": 90700
},
{
"epoch": 0.8872819660917575,
"grad_norm": 0.39078134298324585,
"learning_rate": 5.636452479843636e-06,
"loss": 0.5991,
"step": 90800
},
{
"epoch": 0.8882591488737969,
"grad_norm": 0.5350062847137451,
"learning_rate": 5.587588565844124e-06,
"loss": 0.5887,
"step": 90900
},
{
"epoch": 0.8892363316558363,
"grad_norm": 0.6059613823890686,
"learning_rate": 5.538724651844613e-06,
"loss": 0.6072,
"step": 91000
},
{
"epoch": 0.8902135144378756,
"grad_norm": 0.4223475158214569,
"learning_rate": 5.489860737845102e-06,
"loss": 0.5866,
"step": 91100
},
{
"epoch": 0.891190697219915,
"grad_norm": 0.8053774237632751,
"learning_rate": 5.44099682384559e-06,
"loss": 0.6031,
"step": 91200
},
{
"epoch": 0.8921678800019543,
"grad_norm": 0.8851518034934998,
"learning_rate": 5.392132909846079e-06,
"loss": 0.5766,
"step": 91300
},
{
"epoch": 0.8931450627839937,
"grad_norm": 0.6842949986457825,
"learning_rate": 5.3432689958465675e-06,
"loss": 0.5593,
"step": 91400
},
{
"epoch": 0.8941222455660331,
"grad_norm": 0.8229865431785583,
"learning_rate": 5.2944050818470564e-06,
"loss": 0.5802,
"step": 91500
},
{
"epoch": 0.8950994283480725,
"grad_norm": 0.7434598207473755,
"learning_rate": 5.2455411678475445e-06,
"loss": 0.6004,
"step": 91600
},
{
"epoch": 0.8960766111301118,
"grad_norm": 0.47747936844825745,
"learning_rate": 5.196677253848033e-06,
"loss": 0.5937,
"step": 91700
},
{
"epoch": 0.8970537939121512,
"grad_norm": 0.7917630076408386,
"learning_rate": 5.147813339848522e-06,
"loss": 0.6119,
"step": 91800
},
{
"epoch": 0.8980309766941906,
"grad_norm": 0.8409056663513184,
"learning_rate": 5.098949425849011e-06,
"loss": 0.6004,
"step": 91900
},
{
"epoch": 0.89900815947623,
"grad_norm": 0.5597165822982788,
"learning_rate": 5.050085511849499e-06,
"loss": 0.6076,
"step": 92000
},
{
"epoch": 0.8999853422582694,
"grad_norm": 0.5740428566932678,
"learning_rate": 5.001221597849988e-06,
"loss": 0.5925,
"step": 92100
},
{
"epoch": 0.9009625250403088,
"grad_norm": 0.739456832408905,
"learning_rate": 4.952357683850477e-06,
"loss": 0.5945,
"step": 92200
},
{
"epoch": 0.9019397078223482,
"grad_norm": 0.5648947954177856,
"learning_rate": 4.903493769850965e-06,
"loss": 0.5712,
"step": 92300
},
{
"epoch": 0.9029168906043875,
"grad_norm": 0.5736894607543945,
"learning_rate": 4.854629855851454e-06,
"loss": 0.6111,
"step": 92400
},
{
"epoch": 0.9038940733864269,
"grad_norm": 0.7701774835586548,
"learning_rate": 4.805765941851942e-06,
"loss": 0.599,
"step": 92500
},
{
"epoch": 0.9048712561684663,
"grad_norm": 0.7485201358795166,
"learning_rate": 4.756902027852431e-06,
"loss": 0.5842,
"step": 92600
},
{
"epoch": 0.9058484389505057,
"grad_norm": 0.6121499538421631,
"learning_rate": 4.70803811385292e-06,
"loss": 0.6198,
"step": 92700
},
{
"epoch": 0.9068256217325451,
"grad_norm": 0.7362948656082153,
"learning_rate": 4.659174199853408e-06,
"loss": 0.6123,
"step": 92800
},
{
"epoch": 0.9078028045145845,
"grad_norm": 0.606191098690033,
"learning_rate": 4.610310285853897e-06,
"loss": 0.6028,
"step": 92900
},
{
"epoch": 0.9087799872966238,
"grad_norm": 0.6618565917015076,
"learning_rate": 4.561446371854386e-06,
"loss": 0.5963,
"step": 93000
},
{
"epoch": 0.9097571700786632,
"grad_norm": 1.5052400827407837,
"learning_rate": 4.512582457854874e-06,
"loss": 0.603,
"step": 93100
},
{
"epoch": 0.9107343528607026,
"grad_norm": 0.8985777497291565,
"learning_rate": 4.463718543855363e-06,
"loss": 0.6156,
"step": 93200
},
{
"epoch": 0.911711535642742,
"grad_norm": 0.8037851452827454,
"learning_rate": 4.414854629855852e-06,
"loss": 0.6406,
"step": 93300
},
{
"epoch": 0.9126887184247814,
"grad_norm": 0.49996376037597656,
"learning_rate": 4.365990715856341e-06,
"loss": 0.6139,
"step": 93400
},
{
"epoch": 0.9136659012068208,
"grad_norm": 0.8254772424697876,
"learning_rate": 4.317126801856829e-06,
"loss": 0.6149,
"step": 93500
},
{
"epoch": 0.9146430839888601,
"grad_norm": 0.7700937390327454,
"learning_rate": 4.268262887857318e-06,
"loss": 0.5993,
"step": 93600
},
{
"epoch": 0.9156202667708995,
"grad_norm": 0.38511478900909424,
"learning_rate": 4.2193989738578065e-06,
"loss": 0.6232,
"step": 93700
},
{
"epoch": 0.9165974495529389,
"grad_norm": 0.6567879319190979,
"learning_rate": 4.1705350598582955e-06,
"loss": 0.5813,
"step": 93800
},
{
"epoch": 0.9175746323349783,
"grad_norm": 0.8876736760139465,
"learning_rate": 4.1216711458587835e-06,
"loss": 0.5938,
"step": 93900
},
{
"epoch": 0.9185518151170177,
"grad_norm": 0.41622501611709595,
"learning_rate": 4.0728072318592724e-06,
"loss": 0.579,
"step": 94000
},
{
"epoch": 0.9195289978990571,
"grad_norm": 0.7455472946166992,
"learning_rate": 4.0239433178597605e-06,
"loss": 0.6011,
"step": 94100
},
{
"epoch": 0.9205061806810964,
"grad_norm": 0.5976389646530151,
"learning_rate": 3.975079403860249e-06,
"loss": 0.6143,
"step": 94200
},
{
"epoch": 0.9214833634631358,
"grad_norm": 0.7773202657699585,
"learning_rate": 3.9262154898607375e-06,
"loss": 0.5796,
"step": 94300
},
{
"epoch": 0.9224605462451752,
"grad_norm": 0.5033147931098938,
"learning_rate": 3.877351575861226e-06,
"loss": 0.5994,
"step": 94400
},
{
"epoch": 0.9234377290272145,
"grad_norm": 0.7234833240509033,
"learning_rate": 3.828487661861715e-06,
"loss": 0.6102,
"step": 94500
},
{
"epoch": 0.924414911809254,
"grad_norm": 0.4259088635444641,
"learning_rate": 3.7796237478622038e-06,
"loss": 0.5787,
"step": 94600
},
{
"epoch": 0.9253920945912933,
"grad_norm": 0.43989598751068115,
"learning_rate": 3.7307598338626923e-06,
"loss": 0.5841,
"step": 94700
},
{
"epoch": 0.9263692773733326,
"grad_norm": 0.4430140256881714,
"learning_rate": 3.681895919863181e-06,
"loss": 0.5933,
"step": 94800
},
{
"epoch": 0.927346460155372,
"grad_norm": 0.7848074436187744,
"learning_rate": 3.63303200586367e-06,
"loss": 0.6138,
"step": 94900
},
{
"epoch": 0.9283236429374114,
"grad_norm": 0.8117037415504456,
"learning_rate": 3.584168091864158e-06,
"loss": 0.5917,
"step": 95000
},
{
"epoch": 0.9293008257194508,
"grad_norm": 0.6667145490646362,
"learning_rate": 3.535304177864647e-06,
"loss": 0.5542,
"step": 95100
},
{
"epoch": 0.9302780085014902,
"grad_norm": 0.7902615070343018,
"learning_rate": 3.486440263865136e-06,
"loss": 0.5741,
"step": 95200
},
{
"epoch": 0.9312551912835296,
"grad_norm": 0.7067260146141052,
"learning_rate": 3.4375763498656245e-06,
"loss": 0.5961,
"step": 95300
},
{
"epoch": 0.9322323740655689,
"grad_norm": 2.328338861465454,
"learning_rate": 3.388712435866113e-06,
"loss": 0.5716,
"step": 95400
},
{
"epoch": 0.9332095568476083,
"grad_norm": 1.1518771648406982,
"learning_rate": 3.3398485218666014e-06,
"loss": 0.6306,
"step": 95500
},
{
"epoch": 0.9341867396296477,
"grad_norm": 0.5183611512184143,
"learning_rate": 3.2909846078670904e-06,
"loss": 0.5998,
"step": 95600
},
{
"epoch": 0.9351639224116871,
"grad_norm": 0.6827223300933838,
"learning_rate": 3.2421206938675793e-06,
"loss": 0.5948,
"step": 95700
},
{
"epoch": 0.9361411051937265,
"grad_norm": 0.6556549668312073,
"learning_rate": 3.1932567798680673e-06,
"loss": 0.6014,
"step": 95800
},
{
"epoch": 0.9371182879757659,
"grad_norm": 0.5259923934936523,
"learning_rate": 3.1443928658685562e-06,
"loss": 0.6192,
"step": 95900
},
{
"epoch": 0.9380954707578052,
"grad_norm": 0.6890705823898315,
"learning_rate": 3.095528951869045e-06,
"loss": 0.5922,
"step": 96000
},
{
"epoch": 0.9390726535398446,
"grad_norm": 0.5739189386367798,
"learning_rate": 3.0466650378695336e-06,
"loss": 0.572,
"step": 96100
},
{
"epoch": 0.940049836321884,
"grad_norm": 0.4784778356552124,
"learning_rate": 2.997801123870022e-06,
"loss": 0.5924,
"step": 96200
},
{
"epoch": 0.9410270191039234,
"grad_norm": 0.4622921049594879,
"learning_rate": 2.9489372098705106e-06,
"loss": 0.6223,
"step": 96300
},
{
"epoch": 0.9420042018859628,
"grad_norm": 0.7146719098091125,
"learning_rate": 2.900073295870999e-06,
"loss": 0.589,
"step": 96400
},
{
"epoch": 0.9429813846680022,
"grad_norm": 0.5467257499694824,
"learning_rate": 2.851209381871488e-06,
"loss": 0.6197,
"step": 96500
},
{
"epoch": 0.9439585674500416,
"grad_norm": 0.6875296831130981,
"learning_rate": 2.8023454678719765e-06,
"loss": 0.588,
"step": 96600
},
{
"epoch": 0.9449357502320809,
"grad_norm": 0.8921650052070618,
"learning_rate": 2.7534815538724654e-06,
"loss": 0.6008,
"step": 96700
},
{
"epoch": 0.9459129330141203,
"grad_norm": 0.6401572823524475,
"learning_rate": 2.704617639872954e-06,
"loss": 0.5858,
"step": 96800
},
{
"epoch": 0.9468901157961597,
"grad_norm": 0.7191618084907532,
"learning_rate": 2.655753725873443e-06,
"loss": 0.5763,
"step": 96900
},
{
"epoch": 0.9478672985781991,
"grad_norm": 0.6186959147453308,
"learning_rate": 2.6068898118739313e-06,
"loss": 0.5695,
"step": 97000
},
{
"epoch": 0.9488444813602385,
"grad_norm": 0.36472517251968384,
"learning_rate": 2.5580258978744198e-06,
"loss": 0.5819,
"step": 97100
},
{
"epoch": 0.9498216641422779,
"grad_norm": 1.0958882570266724,
"learning_rate": 2.5091619838749083e-06,
"loss": 0.6167,
"step": 97200
},
{
"epoch": 0.9507988469243172,
"grad_norm": 0.7372691631317139,
"learning_rate": 2.460298069875397e-06,
"loss": 0.5936,
"step": 97300
},
{
"epoch": 0.9517760297063566,
"grad_norm": 0.4143502116203308,
"learning_rate": 2.4114341558758857e-06,
"loss": 0.5873,
"step": 97400
},
{
"epoch": 0.952753212488396,
"grad_norm": 1.134059190750122,
"learning_rate": 2.3625702418763746e-06,
"loss": 0.6143,
"step": 97500
},
{
"epoch": 0.9537303952704353,
"grad_norm": 0.40213558077812195,
"learning_rate": 2.313706327876863e-06,
"loss": 0.5725,
"step": 97600
},
{
"epoch": 0.9547075780524747,
"grad_norm": 0.5387831926345825,
"learning_rate": 2.264842413877352e-06,
"loss": 0.5959,
"step": 97700
},
{
"epoch": 0.9556847608345141,
"grad_norm": 0.8288729786872864,
"learning_rate": 2.2159784998778405e-06,
"loss": 0.5881,
"step": 97800
},
{
"epoch": 0.9566619436165534,
"grad_norm": 0.7433648109436035,
"learning_rate": 2.167114585878329e-06,
"loss": 0.5881,
"step": 97900
},
{
"epoch": 0.9576391263985928,
"grad_norm": 0.7633154392242432,
"learning_rate": 2.1182506718788174e-06,
"loss": 0.6218,
"step": 98000
},
{
"epoch": 0.9586163091806322,
"grad_norm": 0.5039961338043213,
"learning_rate": 2.069386757879306e-06,
"loss": 0.5973,
"step": 98100
},
{
"epoch": 0.9595934919626716,
"grad_norm": 0.9047883152961731,
"learning_rate": 2.020522843879795e-06,
"loss": 0.5741,
"step": 98200
},
{
"epoch": 0.960570674744711,
"grad_norm": 0.6591965556144714,
"learning_rate": 1.9716589298802833e-06,
"loss": 0.5914,
"step": 98300
},
{
"epoch": 0.9615478575267504,
"grad_norm": 0.6809371113777161,
"learning_rate": 1.9227950158807722e-06,
"loss": 0.5876,
"step": 98400
},
{
"epoch": 0.9625250403087897,
"grad_norm": 0.5399168133735657,
"learning_rate": 1.8739311018812607e-06,
"loss": 0.5921,
"step": 98500
},
{
"epoch": 0.9635022230908291,
"grad_norm": 0.6308420896530151,
"learning_rate": 1.8250671878817494e-06,
"loss": 0.5805,
"step": 98600
},
{
"epoch": 0.9644794058728685,
"grad_norm": 0.8909119963645935,
"learning_rate": 1.776203273882238e-06,
"loss": 0.6062,
"step": 98700
},
{
"epoch": 0.9654565886549079,
"grad_norm": 0.5217241048812866,
"learning_rate": 1.7273393598827268e-06,
"loss": 0.5866,
"step": 98800
},
{
"epoch": 0.9664337714369473,
"grad_norm": 0.5441256165504456,
"learning_rate": 1.6784754458832153e-06,
"loss": 0.5889,
"step": 98900
},
{
"epoch": 0.9674109542189867,
"grad_norm": 0.6473023891448975,
"learning_rate": 1.629611531883704e-06,
"loss": 0.6066,
"step": 99000
},
{
"epoch": 0.968388137001026,
"grad_norm": 0.7462273836135864,
"learning_rate": 1.5807476178841925e-06,
"loss": 0.5926,
"step": 99100
},
{
"epoch": 0.9693653197830654,
"grad_norm": 0.4794386029243469,
"learning_rate": 1.5318837038846812e-06,
"loss": 0.5856,
"step": 99200
},
{
"epoch": 0.9703425025651048,
"grad_norm": 0.5676984190940857,
"learning_rate": 1.48301978988517e-06,
"loss": 0.5797,
"step": 99300
},
{
"epoch": 0.9713196853471442,
"grad_norm": 0.7232435941696167,
"learning_rate": 1.4341558758856586e-06,
"loss": 0.6122,
"step": 99400
},
{
"epoch": 0.9722968681291836,
"grad_norm": 0.6773326396942139,
"learning_rate": 1.385291961886147e-06,
"loss": 0.5877,
"step": 99500
},
{
"epoch": 0.973274050911223,
"grad_norm": 0.522219717502594,
"learning_rate": 1.3364280478866358e-06,
"loss": 0.5819,
"step": 99600
},
{
"epoch": 0.9742512336932623,
"grad_norm": 0.7057138681411743,
"learning_rate": 1.2875641338871245e-06,
"loss": 0.6047,
"step": 99700
},
{
"epoch": 0.9752284164753017,
"grad_norm": 0.8740668296813965,
"learning_rate": 1.2387002198876132e-06,
"loss": 0.5909,
"step": 99800
},
{
"epoch": 0.9762055992573411,
"grad_norm": 0.6199445128440857,
"learning_rate": 1.1898363058881017e-06,
"loss": 0.5972,
"step": 99900
},
{
"epoch": 0.9771827820393805,
"grad_norm": 0.8061028122901917,
"learning_rate": 1.1409723918885904e-06,
"loss": 0.5958,
"step": 100000
}
],
"logging_steps": 100,
"max_steps": 102335,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.254292317011968e+18,
"train_batch_size": 12,
"trial_name": null,
"trial_params": null
}