|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9771827820393805, |
|
"eval_steps": 500, |
|
"global_step": 100000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009771827820393806, |
|
"grad_norm": 0.5417118072509766, |
|
"learning_rate": 4.995602247740044e-05, |
|
"loss": 1.378, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.001954365564078761, |
|
"grad_norm": 0.6493918895721436, |
|
"learning_rate": 4.990715856340093e-05, |
|
"loss": 1.3304, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0029315483461181415, |
|
"grad_norm": 0.9062462449073792, |
|
"learning_rate": 4.9858294649401425e-05, |
|
"loss": 1.3284, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.003908731128157522, |
|
"grad_norm": 0.750052273273468, |
|
"learning_rate": 4.9809430735401906e-05, |
|
"loss": 1.3166, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.004885913910196903, |
|
"grad_norm": 0.6602022051811218, |
|
"learning_rate": 4.97605668214024e-05, |
|
"loss": 1.3166, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.005863096692236283, |
|
"grad_norm": 0.4193927049636841, |
|
"learning_rate": 4.971170290740288e-05, |
|
"loss": 1.3098, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.006840279474275663, |
|
"grad_norm": 0.6095415949821472, |
|
"learning_rate": 4.966283899340338e-05, |
|
"loss": 1.3103, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.007817462256315045, |
|
"grad_norm": 0.9943467378616333, |
|
"learning_rate": 4.9613975079403865e-05, |
|
"loss": 1.3096, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.008794645038354424, |
|
"grad_norm": 1.2263585329055786, |
|
"learning_rate": 4.9565111165404346e-05, |
|
"loss": 1.3067, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.009771827820393805, |
|
"grad_norm": 0.7198677659034729, |
|
"learning_rate": 4.951624725140484e-05, |
|
"loss": 1.3041, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.010749010602433185, |
|
"grad_norm": 0.7370775938034058, |
|
"learning_rate": 4.946738333740533e-05, |
|
"loss": 1.302, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.011726193384472566, |
|
"grad_norm": 0.5109437704086304, |
|
"learning_rate": 4.941851942340582e-05, |
|
"loss": 1.3089, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.012703376166511945, |
|
"grad_norm": 0.1879555583000183, |
|
"learning_rate": 4.9369655509406305e-05, |
|
"loss": 1.3043, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.013680558948551327, |
|
"grad_norm": 0.951046884059906, |
|
"learning_rate": 4.932079159540679e-05, |
|
"loss": 1.3098, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.014657741730590706, |
|
"grad_norm": 0.2478829026222229, |
|
"learning_rate": 4.927192768140728e-05, |
|
"loss": 1.3026, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01563492451263009, |
|
"grad_norm": 0.5585843324661255, |
|
"learning_rate": 4.9223063767407776e-05, |
|
"loss": 1.3014, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.016612107294669467, |
|
"grad_norm": 0.48532453179359436, |
|
"learning_rate": 4.917419985340826e-05, |
|
"loss": 1.2981, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.017589290076708848, |
|
"grad_norm": 0.4233573079109192, |
|
"learning_rate": 4.912533593940875e-05, |
|
"loss": 1.2992, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.01856647285874823, |
|
"grad_norm": 0.3272475600242615, |
|
"learning_rate": 4.9076472025409234e-05, |
|
"loss": 1.292, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.01954365564078761, |
|
"grad_norm": 0.5299385786056519, |
|
"learning_rate": 4.902760811140973e-05, |
|
"loss": 1.2963, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02052083842282699, |
|
"grad_norm": 0.1614024043083191, |
|
"learning_rate": 4.8978744197410216e-05, |
|
"loss": 1.2945, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.02149802120486637, |
|
"grad_norm": 0.6039963960647583, |
|
"learning_rate": 4.8929880283410705e-05, |
|
"loss": 1.2913, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.02247520398690575, |
|
"grad_norm": 0.5772804021835327, |
|
"learning_rate": 4.888101636941119e-05, |
|
"loss": 1.2895, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.023452386768945132, |
|
"grad_norm": 0.7489622235298157, |
|
"learning_rate": 4.883215245541168e-05, |
|
"loss": 1.2847, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.024429569550984513, |
|
"grad_norm": 0.30208253860473633, |
|
"learning_rate": 4.878328854141217e-05, |
|
"loss": 1.2924, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02540675233302389, |
|
"grad_norm": 0.36944472789764404, |
|
"learning_rate": 4.873442462741266e-05, |
|
"loss": 1.2916, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.026383935115063272, |
|
"grad_norm": 0.3268676698207855, |
|
"learning_rate": 4.8685560713413145e-05, |
|
"loss": 1.2893, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.027361117897102653, |
|
"grad_norm": 0.2795974910259247, |
|
"learning_rate": 4.863669679941363e-05, |
|
"loss": 1.282, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.028338300679142035, |
|
"grad_norm": 0.36298853158950806, |
|
"learning_rate": 4.858783288541413e-05, |
|
"loss": 1.2832, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.029315483461181412, |
|
"grad_norm": 0.5242423415184021, |
|
"learning_rate": 4.853896897141461e-05, |
|
"loss": 1.2819, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.030292666243220794, |
|
"grad_norm": 0.25340864062309265, |
|
"learning_rate": 4.8490105057415104e-05, |
|
"loss": 1.2809, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.03126984902526018, |
|
"grad_norm": 0.7241976261138916, |
|
"learning_rate": 4.844124114341559e-05, |
|
"loss": 1.2802, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.032247031807299556, |
|
"grad_norm": 0.5154001712799072, |
|
"learning_rate": 4.839237722941608e-05, |
|
"loss": 1.2748, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.033224214589338934, |
|
"grad_norm": 0.5323473811149597, |
|
"learning_rate": 4.834351331541657e-05, |
|
"loss": 1.284, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.03420139737137832, |
|
"grad_norm": 0.3947168290615082, |
|
"learning_rate": 4.8294649401417056e-05, |
|
"loss": 1.276, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.035178580153417696, |
|
"grad_norm": 0.4776057302951813, |
|
"learning_rate": 4.8245785487417544e-05, |
|
"loss": 1.2783, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.036155762935457074, |
|
"grad_norm": 0.4884164035320282, |
|
"learning_rate": 4.819692157341804e-05, |
|
"loss": 1.2745, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.03713294571749646, |
|
"grad_norm": 0.5210428833961487, |
|
"learning_rate": 4.814805765941852e-05, |
|
"loss": 1.2707, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.038110128499535836, |
|
"grad_norm": 0.46214359998703003, |
|
"learning_rate": 4.809919374541901e-05, |
|
"loss": 1.2727, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.03908731128157522, |
|
"grad_norm": 0.2656782865524292, |
|
"learning_rate": 4.8050329831419496e-05, |
|
"loss": 1.2694, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.0400644940636146, |
|
"grad_norm": 0.4923059940338135, |
|
"learning_rate": 4.8001465917419985e-05, |
|
"loss": 1.2665, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.04104167684565398, |
|
"grad_norm": 0.92928147315979, |
|
"learning_rate": 4.795260200342048e-05, |
|
"loss": 1.2627, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.04201885962769336, |
|
"grad_norm": 1.0651229619979858, |
|
"learning_rate": 4.790373808942096e-05, |
|
"loss": 1.2623, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.04299604240973274, |
|
"grad_norm": 0.9612557888031006, |
|
"learning_rate": 4.7854874175421456e-05, |
|
"loss": 1.2482, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.043973225191772124, |
|
"grad_norm": 1.0120874643325806, |
|
"learning_rate": 4.7806010261421944e-05, |
|
"loss": 1.2589, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.0449504079738115, |
|
"grad_norm": 0.6250020861625671, |
|
"learning_rate": 4.775714634742243e-05, |
|
"loss": 1.2499, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.04592759075585088, |
|
"grad_norm": 0.2850038707256317, |
|
"learning_rate": 4.770828243342292e-05, |
|
"loss": 1.2446, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.046904773537890264, |
|
"grad_norm": 1.2032625675201416, |
|
"learning_rate": 4.765941851942341e-05, |
|
"loss": 1.2238, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.04788195631992964, |
|
"grad_norm": 0.42024949193000793, |
|
"learning_rate": 4.7610554605423896e-05, |
|
"loss": 1.2255, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.048859139101969026, |
|
"grad_norm": 0.7451406121253967, |
|
"learning_rate": 4.756169069142439e-05, |
|
"loss": 1.2071, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.049836321884008404, |
|
"grad_norm": 0.8735096454620361, |
|
"learning_rate": 4.751282677742487e-05, |
|
"loss": 1.2126, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.05081350466604778, |
|
"grad_norm": 0.73675137758255, |
|
"learning_rate": 4.746396286342537e-05, |
|
"loss": 1.2036, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.051790687448087167, |
|
"grad_norm": 0.6540606617927551, |
|
"learning_rate": 4.741509894942585e-05, |
|
"loss": 1.1825, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.052767870230126544, |
|
"grad_norm": 0.825066864490509, |
|
"learning_rate": 4.7366235035426336e-05, |
|
"loss": 1.1655, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.05374505301216593, |
|
"grad_norm": 1.6421219110488892, |
|
"learning_rate": 4.731737112142683e-05, |
|
"loss": 1.1716, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.05472223579420531, |
|
"grad_norm": 1.0644057989120483, |
|
"learning_rate": 4.726850720742731e-05, |
|
"loss": 1.1384, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.055699418576244684, |
|
"grad_norm": 1.1611616611480713, |
|
"learning_rate": 4.721964329342781e-05, |
|
"loss": 1.1499, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.05667660135828407, |
|
"grad_norm": 2.0900723934173584, |
|
"learning_rate": 4.7170779379428295e-05, |
|
"loss": 1.1323, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.05765378414032345, |
|
"grad_norm": 1.0580404996871948, |
|
"learning_rate": 4.712191546542878e-05, |
|
"loss": 1.112, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.058630966922362825, |
|
"grad_norm": 0.6299407482147217, |
|
"learning_rate": 4.707305155142927e-05, |
|
"loss": 1.104, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.05960814970440221, |
|
"grad_norm": 0.6816271543502808, |
|
"learning_rate": 4.702418763742976e-05, |
|
"loss": 1.1128, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.06058533248644159, |
|
"grad_norm": 0.654796302318573, |
|
"learning_rate": 4.697532372343025e-05, |
|
"loss": 1.0942, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.06156251526848097, |
|
"grad_norm": 1.0433884859085083, |
|
"learning_rate": 4.692645980943074e-05, |
|
"loss": 1.0862, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.06253969805052036, |
|
"grad_norm": 0.6256537437438965, |
|
"learning_rate": 4.6877595895431224e-05, |
|
"loss": 1.081, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.06351688083255973, |
|
"grad_norm": 0.8173975348472595, |
|
"learning_rate": 4.682873198143172e-05, |
|
"loss": 1.0767, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.06449406361459911, |
|
"grad_norm": 0.7856473922729492, |
|
"learning_rate": 4.6779868067432206e-05, |
|
"loss": 1.0767, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.0654712463966385, |
|
"grad_norm": 0.6337741017341614, |
|
"learning_rate": 4.6731004153432695e-05, |
|
"loss": 1.0829, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.06644842917867787, |
|
"grad_norm": 0.5813809037208557, |
|
"learning_rate": 4.668214023943318e-05, |
|
"loss": 1.0571, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.06742561196071725, |
|
"grad_norm": 0.4155445992946625, |
|
"learning_rate": 4.6633276325433664e-05, |
|
"loss": 1.0707, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.06840279474275664, |
|
"grad_norm": 0.6730567812919617, |
|
"learning_rate": 4.658441241143416e-05, |
|
"loss": 1.0477, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.06937997752479601, |
|
"grad_norm": 0.8348300457000732, |
|
"learning_rate": 4.653554849743465e-05, |
|
"loss": 1.0644, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.07035716030683539, |
|
"grad_norm": 2.2414326667785645, |
|
"learning_rate": 4.6486684583435135e-05, |
|
"loss": 1.0577, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.07133434308887478, |
|
"grad_norm": 1.6573911905288696, |
|
"learning_rate": 4.643782066943562e-05, |
|
"loss": 1.0836, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.07231152587091415, |
|
"grad_norm": 0.5690039396286011, |
|
"learning_rate": 4.638895675543611e-05, |
|
"loss": 1.0541, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.07328870865295353, |
|
"grad_norm": 0.527215301990509, |
|
"learning_rate": 4.63400928414366e-05, |
|
"loss": 1.0164, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.07426589143499292, |
|
"grad_norm": 0.7997362613677979, |
|
"learning_rate": 4.6291228927437094e-05, |
|
"loss": 1.0447, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.0752430742170323, |
|
"grad_norm": 2.257143259048462, |
|
"learning_rate": 4.6242365013437575e-05, |
|
"loss": 1.0365, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.07622025699907167, |
|
"grad_norm": 0.9132490158081055, |
|
"learning_rate": 4.619350109943807e-05, |
|
"loss": 1.0498, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.07719743978111106, |
|
"grad_norm": 0.5229859948158264, |
|
"learning_rate": 4.614463718543856e-05, |
|
"loss": 1.0342, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.07817462256315044, |
|
"grad_norm": 0.6948792338371277, |
|
"learning_rate": 4.6095773271439046e-05, |
|
"loss": 1.0325, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.07915180534518981, |
|
"grad_norm": 0.8526360988616943, |
|
"learning_rate": 4.6046909357439534e-05, |
|
"loss": 1.0183, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.0801289881272292, |
|
"grad_norm": 1.1457374095916748, |
|
"learning_rate": 4.599804544344002e-05, |
|
"loss": 1.0243, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.08110617090926858, |
|
"grad_norm": 0.9335997700691223, |
|
"learning_rate": 4.594918152944051e-05, |
|
"loss": 1.046, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.08208335369130795, |
|
"grad_norm": 0.8367229700088501, |
|
"learning_rate": 4.5900317615441e-05, |
|
"loss": 1.0176, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.08306053647334734, |
|
"grad_norm": 3.7648801803588867, |
|
"learning_rate": 4.5851453701441486e-05, |
|
"loss": 1.0047, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.08403771925538672, |
|
"grad_norm": 0.5877612829208374, |
|
"learning_rate": 4.5802589787441975e-05, |
|
"loss": 1.0346, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.08501490203742611, |
|
"grad_norm": 0.5145990252494812, |
|
"learning_rate": 4.575372587344246e-05, |
|
"loss": 1.0268, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.08599208481946548, |
|
"grad_norm": 0.9310688376426697, |
|
"learning_rate": 4.570486195944295e-05, |
|
"loss": 1.0109, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.08696926760150486, |
|
"grad_norm": 0.5182886719703674, |
|
"learning_rate": 4.5655998045443445e-05, |
|
"loss": 1.0117, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.08794645038354425, |
|
"grad_norm": 0.4319695234298706, |
|
"learning_rate": 4.560713413144393e-05, |
|
"loss": 1.0053, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.08892363316558362, |
|
"grad_norm": 4.307732582092285, |
|
"learning_rate": 4.555827021744442e-05, |
|
"loss": 1.0151, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.089900815947623, |
|
"grad_norm": 0.46516236662864685, |
|
"learning_rate": 4.550940630344491e-05, |
|
"loss": 0.9945, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.09087799872966239, |
|
"grad_norm": 1.2372952699661255, |
|
"learning_rate": 4.54605423894454e-05, |
|
"loss": 0.9865, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.09185518151170176, |
|
"grad_norm": 0.7494595646858215, |
|
"learning_rate": 4.5411678475445886e-05, |
|
"loss": 0.9824, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.09283236429374114, |
|
"grad_norm": 0.5540333390235901, |
|
"learning_rate": 4.5362814561446374e-05, |
|
"loss": 1.0132, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.09380954707578053, |
|
"grad_norm": 0.48533427715301514, |
|
"learning_rate": 4.531395064744686e-05, |
|
"loss": 1.0173, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.0947867298578199, |
|
"grad_norm": 0.4972572922706604, |
|
"learning_rate": 4.526508673344736e-05, |
|
"loss": 1.0078, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.09576391263985928, |
|
"grad_norm": 0.6748878955841064, |
|
"learning_rate": 4.521622281944784e-05, |
|
"loss": 1.0172, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.09674109542189867, |
|
"grad_norm": 0.5261876583099365, |
|
"learning_rate": 4.5167358905448326e-05, |
|
"loss": 1.0189, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.09771827820393805, |
|
"grad_norm": 0.4164600670337677, |
|
"learning_rate": 4.5118494991448814e-05, |
|
"loss": 0.9978, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.09869546098597742, |
|
"grad_norm": 0.40417763590812683, |
|
"learning_rate": 4.50696310774493e-05, |
|
"loss": 1.0103, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.09967264376801681, |
|
"grad_norm": 0.8591890931129456, |
|
"learning_rate": 4.50207671634498e-05, |
|
"loss": 1.0065, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.10064982655005619, |
|
"grad_norm": 0.5676371455192566, |
|
"learning_rate": 4.497190324945028e-05, |
|
"loss": 1.0089, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.10162700933209556, |
|
"grad_norm": 0.616646945476532, |
|
"learning_rate": 4.492303933545077e-05, |
|
"loss": 0.9897, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.10260419211413495, |
|
"grad_norm": 0.37536484003067017, |
|
"learning_rate": 4.487417542145126e-05, |
|
"loss": 0.9989, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.10358137489617433, |
|
"grad_norm": 0.6801789402961731, |
|
"learning_rate": 4.482531150745175e-05, |
|
"loss": 0.9923, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.1045585576782137, |
|
"grad_norm": 0.5848776698112488, |
|
"learning_rate": 4.477644759345224e-05, |
|
"loss": 0.9919, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.10553574046025309, |
|
"grad_norm": 0.7715157866477966, |
|
"learning_rate": 4.4727583679452725e-05, |
|
"loss": 0.9814, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.10651292324229247, |
|
"grad_norm": 0.8080986142158508, |
|
"learning_rate": 4.4678719765453214e-05, |
|
"loss": 0.9935, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.10749010602433186, |
|
"grad_norm": 0.4375016391277313, |
|
"learning_rate": 4.462985585145371e-05, |
|
"loss": 0.988, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.10846728880637123, |
|
"grad_norm": 0.8055805563926697, |
|
"learning_rate": 4.458099193745419e-05, |
|
"loss": 0.9861, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.10944447158841061, |
|
"grad_norm": 1.1914618015289307, |
|
"learning_rate": 4.4532128023454685e-05, |
|
"loss": 0.9622, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.11042165437045, |
|
"grad_norm": 0.4247540533542633, |
|
"learning_rate": 4.448326410945517e-05, |
|
"loss": 0.9602, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.11139883715248937, |
|
"grad_norm": 0.5454650521278381, |
|
"learning_rate": 4.4434400195455654e-05, |
|
"loss": 0.9696, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.11237601993452875, |
|
"grad_norm": 0.5259748697280884, |
|
"learning_rate": 4.438553628145615e-05, |
|
"loss": 1.0021, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.11335320271656814, |
|
"grad_norm": 0.5165246725082397, |
|
"learning_rate": 4.433667236745663e-05, |
|
"loss": 0.982, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.11433038549860751, |
|
"grad_norm": 0.6768147945404053, |
|
"learning_rate": 4.4287808453457125e-05, |
|
"loss": 0.9398, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.1153075682806469, |
|
"grad_norm": 1.0245041847229004, |
|
"learning_rate": 4.423894453945761e-05, |
|
"loss": 0.9934, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.11628475106268628, |
|
"grad_norm": 0.6241583228111267, |
|
"learning_rate": 4.41900806254581e-05, |
|
"loss": 0.9697, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.11726193384472565, |
|
"grad_norm": 0.4234873652458191, |
|
"learning_rate": 4.414121671145859e-05, |
|
"loss": 0.9723, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.11823911662676503, |
|
"grad_norm": 0.3932545781135559, |
|
"learning_rate": 4.409235279745908e-05, |
|
"loss": 0.9826, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.11921629940880442, |
|
"grad_norm": 1.5067880153656006, |
|
"learning_rate": 4.4043488883459565e-05, |
|
"loss": 0.9581, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.1201934821908438, |
|
"grad_norm": 0.41707366704940796, |
|
"learning_rate": 4.399462496946006e-05, |
|
"loss": 0.9666, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.12117066497288317, |
|
"grad_norm": 1.1278653144836426, |
|
"learning_rate": 4.394576105546054e-05, |
|
"loss": 0.9553, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.12214784775492256, |
|
"grad_norm": 0.350543737411499, |
|
"learning_rate": 4.3896897141461036e-05, |
|
"loss": 0.9422, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.12312503053696194, |
|
"grad_norm": 0.3775838315486908, |
|
"learning_rate": 4.3848033227461524e-05, |
|
"loss": 0.9626, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.12410221331900131, |
|
"grad_norm": 0.8341017365455627, |
|
"learning_rate": 4.379916931346201e-05, |
|
"loss": 0.9289, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.1250793961010407, |
|
"grad_norm": 0.805614173412323, |
|
"learning_rate": 4.37503053994625e-05, |
|
"loss": 0.9474, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.12605657888308008, |
|
"grad_norm": 0.8439397215843201, |
|
"learning_rate": 4.370144148546299e-05, |
|
"loss": 0.9661, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.12703376166511945, |
|
"grad_norm": 1.1272892951965332, |
|
"learning_rate": 4.3652577571463476e-05, |
|
"loss": 0.9514, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.12801094444715885, |
|
"grad_norm": 0.6426375508308411, |
|
"learning_rate": 4.3603713657463965e-05, |
|
"loss": 0.9448, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.12898812722919822, |
|
"grad_norm": 1.3205431699752808, |
|
"learning_rate": 4.355484974346445e-05, |
|
"loss": 0.9511, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.1299653100112376, |
|
"grad_norm": 0.3671954870223999, |
|
"learning_rate": 4.350598582946494e-05, |
|
"loss": 0.9506, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.130942492793277, |
|
"grad_norm": 0.7566332817077637, |
|
"learning_rate": 4.345712191546543e-05, |
|
"loss": 0.9363, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.13191967557531636, |
|
"grad_norm": 0.8800159692764282, |
|
"learning_rate": 4.340825800146592e-05, |
|
"loss": 0.9388, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.13289685835735573, |
|
"grad_norm": 0.7134628891944885, |
|
"learning_rate": 4.335939408746641e-05, |
|
"loss": 0.9162, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.13387404113939513, |
|
"grad_norm": 0.5555543899536133, |
|
"learning_rate": 4.331053017346689e-05, |
|
"loss": 0.9366, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.1348512239214345, |
|
"grad_norm": 0.4485512375831604, |
|
"learning_rate": 4.326166625946739e-05, |
|
"loss": 0.9286, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.13582840670347388, |
|
"grad_norm": 0.8888948559761047, |
|
"learning_rate": 4.3212802345467876e-05, |
|
"loss": 0.943, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.13680558948551327, |
|
"grad_norm": 0.6719749569892883, |
|
"learning_rate": 4.3163938431468364e-05, |
|
"loss": 0.9217, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.13778277226755264, |
|
"grad_norm": 0.695377767086029, |
|
"learning_rate": 4.311507451746885e-05, |
|
"loss": 0.9093, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.13875995504959202, |
|
"grad_norm": 0.5966312885284424, |
|
"learning_rate": 4.306621060346934e-05, |
|
"loss": 0.9195, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.13973713783163141, |
|
"grad_norm": 0.8073310256004333, |
|
"learning_rate": 4.301734668946983e-05, |
|
"loss": 0.9309, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.14071432061367078, |
|
"grad_norm": 0.6303800940513611, |
|
"learning_rate": 4.2968482775470316e-05, |
|
"loss": 0.9458, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.14169150339571016, |
|
"grad_norm": 0.7043970823287964, |
|
"learning_rate": 4.2919618861470804e-05, |
|
"loss": 0.9132, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.14266868617774955, |
|
"grad_norm": 0.9100736379623413, |
|
"learning_rate": 4.287075494747129e-05, |
|
"loss": 0.9296, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.14364586895978892, |
|
"grad_norm": 0.787862241268158, |
|
"learning_rate": 4.282189103347179e-05, |
|
"loss": 0.9643, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.1446230517418283, |
|
"grad_norm": 0.8169028162956238, |
|
"learning_rate": 4.277302711947227e-05, |
|
"loss": 0.9244, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.1456002345238677, |
|
"grad_norm": 0.9544184803962708, |
|
"learning_rate": 4.272416320547276e-05, |
|
"loss": 0.918, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.14657741730590707, |
|
"grad_norm": 0.5325574278831482, |
|
"learning_rate": 4.2675299291473245e-05, |
|
"loss": 0.9273, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.14755460008794646, |
|
"grad_norm": 1.1403323411941528, |
|
"learning_rate": 4.262643537747374e-05, |
|
"loss": 0.9095, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.14853178286998583, |
|
"grad_norm": 1.0411937236785889, |
|
"learning_rate": 4.257757146347423e-05, |
|
"loss": 0.8967, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.1495089656520252, |
|
"grad_norm": 0.630393922328949, |
|
"learning_rate": 4.2528707549474715e-05, |
|
"loss": 0.8883, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.1504861484340646, |
|
"grad_norm": 0.9445775747299194, |
|
"learning_rate": 4.2479843635475204e-05, |
|
"loss": 0.9253, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.15146333121610397, |
|
"grad_norm": 0.5689444541931152, |
|
"learning_rate": 4.243097972147569e-05, |
|
"loss": 0.8983, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.15244051399814335, |
|
"grad_norm": 0.7726677656173706, |
|
"learning_rate": 4.238211580747618e-05, |
|
"loss": 0.9228, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.15341769678018274, |
|
"grad_norm": 0.8260165452957153, |
|
"learning_rate": 4.2333251893476675e-05, |
|
"loss": 0.9202, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.15439487956222211, |
|
"grad_norm": 0.4869302809238434, |
|
"learning_rate": 4.2284387979477156e-05, |
|
"loss": 0.9283, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.15537206234426149, |
|
"grad_norm": 0.5768991708755493, |
|
"learning_rate": 4.2235524065477644e-05, |
|
"loss": 0.9233, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.15634924512630088, |
|
"grad_norm": 0.8856435418128967, |
|
"learning_rate": 4.218666015147814e-05, |
|
"loss": 0.8825, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.15732642790834026, |
|
"grad_norm": 0.5258185267448425, |
|
"learning_rate": 4.213779623747862e-05, |
|
"loss": 0.8834, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.15830361069037963, |
|
"grad_norm": 0.8340526223182678, |
|
"learning_rate": 4.2088932323479115e-05, |
|
"loss": 0.8856, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.15928079347241902, |
|
"grad_norm": 0.4123723804950714, |
|
"learning_rate": 4.2040068409479596e-05, |
|
"loss": 0.8957, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.1602579762544584, |
|
"grad_norm": 0.8336274027824402, |
|
"learning_rate": 4.199120449548009e-05, |
|
"loss": 0.9053, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.16123515903649777, |
|
"grad_norm": 0.7977516055107117, |
|
"learning_rate": 4.194234058148058e-05, |
|
"loss": 0.8698, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.16221234181853716, |
|
"grad_norm": 0.5064985156059265, |
|
"learning_rate": 4.189347666748107e-05, |
|
"loss": 0.8945, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.16318952460057654, |
|
"grad_norm": 0.8241267204284668, |
|
"learning_rate": 4.1844612753481555e-05, |
|
"loss": 0.8875, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.1641667073826159, |
|
"grad_norm": 0.7517113089561462, |
|
"learning_rate": 4.179574883948204e-05, |
|
"loss": 0.8845, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.1651438901646553, |
|
"grad_norm": 0.6297169923782349, |
|
"learning_rate": 4.174688492548253e-05, |
|
"loss": 0.9303, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.16612107294669468, |
|
"grad_norm": 0.5828490257263184, |
|
"learning_rate": 4.1698021011483026e-05, |
|
"loss": 0.8654, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.16709825572873405, |
|
"grad_norm": 0.3038561940193176, |
|
"learning_rate": 4.164915709748351e-05, |
|
"loss": 0.8933, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.16807543851077344, |
|
"grad_norm": 0.8928827047348022, |
|
"learning_rate": 4.1600293183484e-05, |
|
"loss": 0.8509, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.16905262129281282, |
|
"grad_norm": 0.7055086493492126, |
|
"learning_rate": 4.155142926948449e-05, |
|
"loss": 0.8814, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.17002980407485221, |
|
"grad_norm": 0.5377823710441589, |
|
"learning_rate": 4.150256535548497e-05, |
|
"loss": 0.888, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.17100698685689159, |
|
"grad_norm": 0.6319778561592102, |
|
"learning_rate": 4.1453701441485466e-05, |
|
"loss": 0.8575, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.17198416963893096, |
|
"grad_norm": 0.8756042122840881, |
|
"learning_rate": 4.1404837527485954e-05, |
|
"loss": 0.8805, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.17296135242097035, |
|
"grad_norm": 0.5293178558349609, |
|
"learning_rate": 4.135597361348644e-05, |
|
"loss": 0.8471, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.17393853520300973, |
|
"grad_norm": 0.9118284583091736, |
|
"learning_rate": 4.130710969948693e-05, |
|
"loss": 0.8426, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.1749157179850491, |
|
"grad_norm": 1.0211195945739746, |
|
"learning_rate": 4.125824578548742e-05, |
|
"loss": 0.8877, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.1758929007670885, |
|
"grad_norm": 1.4174985885620117, |
|
"learning_rate": 4.120938187148791e-05, |
|
"loss": 0.8731, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.17687008354912787, |
|
"grad_norm": 0.8243415951728821, |
|
"learning_rate": 4.1160517957488395e-05, |
|
"loss": 0.8852, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.17784726633116724, |
|
"grad_norm": 0.8385602235794067, |
|
"learning_rate": 4.111165404348888e-05, |
|
"loss": 0.8361, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.17882444911320663, |
|
"grad_norm": 1.003968358039856, |
|
"learning_rate": 4.106279012948938e-05, |
|
"loss": 0.8738, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.179801631895246, |
|
"grad_norm": 0.7428449988365173, |
|
"learning_rate": 4.101392621548986e-05, |
|
"loss": 0.8563, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.18077881467728538, |
|
"grad_norm": 1.8963735103607178, |
|
"learning_rate": 4.0965062301490354e-05, |
|
"loss": 0.8428, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.18175599745932478, |
|
"grad_norm": 0.6868895888328552, |
|
"learning_rate": 4.091619838749084e-05, |
|
"loss": 0.8727, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.18273318024136415, |
|
"grad_norm": 1.8936256170272827, |
|
"learning_rate": 4.086733447349133e-05, |
|
"loss": 0.9211, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.18371036302340352, |
|
"grad_norm": 1.004941463470459, |
|
"learning_rate": 4.081847055949182e-05, |
|
"loss": 0.8404, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.18468754580544292, |
|
"grad_norm": 1.4084818363189697, |
|
"learning_rate": 4.0769606645492306e-05, |
|
"loss": 0.868, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.1856647285874823, |
|
"grad_norm": 0.6459541320800781, |
|
"learning_rate": 4.0720742731492794e-05, |
|
"loss": 0.8583, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.18664191136952166, |
|
"grad_norm": 0.7335548996925354, |
|
"learning_rate": 4.067187881749328e-05, |
|
"loss": 0.8622, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.18761909415156106, |
|
"grad_norm": 0.6783348321914673, |
|
"learning_rate": 4.062301490349377e-05, |
|
"loss": 0.8572, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.18859627693360043, |
|
"grad_norm": 0.6323419809341431, |
|
"learning_rate": 4.057415098949426e-05, |
|
"loss": 0.8763, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 0.963927686214447, |
|
"learning_rate": 4.052528707549475e-05, |
|
"loss": 0.8543, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.1905506424976792, |
|
"grad_norm": 0.4785550832748413, |
|
"learning_rate": 4.0476423161495234e-05, |
|
"loss": 0.863, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.19152782527971857, |
|
"grad_norm": 0.6358627080917358, |
|
"learning_rate": 4.042755924749573e-05, |
|
"loss": 0.8842, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.19250500806175797, |
|
"grad_norm": 0.7857956886291504, |
|
"learning_rate": 4.037869533349621e-05, |
|
"loss": 0.8698, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.19348219084379734, |
|
"grad_norm": 0.5225537419319153, |
|
"learning_rate": 4.0329831419496705e-05, |
|
"loss": 0.8842, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.1944593736258367, |
|
"grad_norm": 0.582313597202301, |
|
"learning_rate": 4.0280967505497194e-05, |
|
"loss": 0.8506, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.1954365564078761, |
|
"grad_norm": 0.7206740379333496, |
|
"learning_rate": 4.023210359149768e-05, |
|
"loss": 0.8529, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.19641373918991548, |
|
"grad_norm": 0.45054760575294495, |
|
"learning_rate": 4.018323967749817e-05, |
|
"loss": 0.8564, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.19739092197195485, |
|
"grad_norm": 0.9214595556259155, |
|
"learning_rate": 4.013437576349866e-05, |
|
"loss": 0.8443, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.19836810475399425, |
|
"grad_norm": 0.9843263626098633, |
|
"learning_rate": 4.0085511849499146e-05, |
|
"loss": 0.856, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.19934528753603362, |
|
"grad_norm": 0.6508098840713501, |
|
"learning_rate": 4.0036647935499634e-05, |
|
"loss": 0.8532, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.200322470318073, |
|
"grad_norm": 0.8091655969619751, |
|
"learning_rate": 3.998778402150012e-05, |
|
"loss": 0.8691, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.20129965310011239, |
|
"grad_norm": 0.8139657378196716, |
|
"learning_rate": 3.993892010750061e-05, |
|
"loss": 0.8608, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.20227683588215176, |
|
"grad_norm": 0.628423273563385, |
|
"learning_rate": 3.9890056193501105e-05, |
|
"loss": 0.8369, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.20325401866419113, |
|
"grad_norm": 1.737331748008728, |
|
"learning_rate": 3.9841192279501586e-05, |
|
"loss": 0.8363, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.20423120144623053, |
|
"grad_norm": 1.036280870437622, |
|
"learning_rate": 3.979232836550208e-05, |
|
"loss": 0.8387, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.2052083842282699, |
|
"grad_norm": 0.35834863781929016, |
|
"learning_rate": 3.974346445150256e-05, |
|
"loss": 0.8565, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.20618556701030927, |
|
"grad_norm": 0.7657331824302673, |
|
"learning_rate": 3.969460053750306e-05, |
|
"loss": 0.8654, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.20716274979234867, |
|
"grad_norm": 1.077300786972046, |
|
"learning_rate": 3.9645736623503545e-05, |
|
"loss": 0.8218, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.20813993257438804, |
|
"grad_norm": 0.5806353688240051, |
|
"learning_rate": 3.959687270950403e-05, |
|
"loss": 0.8375, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.2091171153564274, |
|
"grad_norm": 0.3875705599784851, |
|
"learning_rate": 3.954800879550452e-05, |
|
"loss": 0.8342, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.2100942981384668, |
|
"grad_norm": 0.7829961180686951, |
|
"learning_rate": 3.949914488150501e-05, |
|
"loss": 0.832, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.21107148092050618, |
|
"grad_norm": 1.9466382265090942, |
|
"learning_rate": 3.94502809675055e-05, |
|
"loss": 0.8118, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.21204866370254555, |
|
"grad_norm": 0.6271357536315918, |
|
"learning_rate": 3.940141705350599e-05, |
|
"loss": 0.8436, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.21302584648458495, |
|
"grad_norm": 1.320719838142395, |
|
"learning_rate": 3.9352553139506474e-05, |
|
"loss": 0.8586, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.21400302926662432, |
|
"grad_norm": 0.6017069220542908, |
|
"learning_rate": 3.930368922550697e-05, |
|
"loss": 0.8242, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.21498021204866372, |
|
"grad_norm": 0.8584203124046326, |
|
"learning_rate": 3.9254825311507456e-05, |
|
"loss": 0.815, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.2159573948307031, |
|
"grad_norm": 0.623652458190918, |
|
"learning_rate": 3.920596139750794e-05, |
|
"loss": 0.812, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.21693457761274246, |
|
"grad_norm": 0.6867117881774902, |
|
"learning_rate": 3.915709748350843e-05, |
|
"loss": 0.8141, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.21791176039478186, |
|
"grad_norm": 0.6963294744491577, |
|
"learning_rate": 3.910823356950892e-05, |
|
"loss": 0.8227, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.21888894317682123, |
|
"grad_norm": 0.6727440357208252, |
|
"learning_rate": 3.905936965550941e-05, |
|
"loss": 0.8285, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.2198661259588606, |
|
"grad_norm": 1.261771559715271, |
|
"learning_rate": 3.90105057415099e-05, |
|
"loss": 0.8396, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.2208433087409, |
|
"grad_norm": 0.9146804809570312, |
|
"learning_rate": 3.8961641827510385e-05, |
|
"loss": 0.8194, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.22182049152293937, |
|
"grad_norm": 0.9350225329399109, |
|
"learning_rate": 3.891277791351087e-05, |
|
"loss": 0.8376, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.22279767430497874, |
|
"grad_norm": 0.6317518353462219, |
|
"learning_rate": 3.886391399951137e-05, |
|
"loss": 0.8313, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.22377485708701814, |
|
"grad_norm": 0.6716780662536621, |
|
"learning_rate": 3.881505008551185e-05, |
|
"loss": 0.8033, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.2247520398690575, |
|
"grad_norm": 0.4494755268096924, |
|
"learning_rate": 3.8766186171512344e-05, |
|
"loss": 0.8047, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.22572922265109688, |
|
"grad_norm": 0.5505642890930176, |
|
"learning_rate": 3.8717322257512825e-05, |
|
"loss": 0.8456, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.22670640543313628, |
|
"grad_norm": 0.8866478800773621, |
|
"learning_rate": 3.866845834351332e-05, |
|
"loss": 0.8105, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.22768358821517565, |
|
"grad_norm": 0.7525384426116943, |
|
"learning_rate": 3.861959442951381e-05, |
|
"loss": 0.8292, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.22866077099721502, |
|
"grad_norm": 0.8182941675186157, |
|
"learning_rate": 3.8570730515514296e-05, |
|
"loss": 0.8392, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.22963795377925442, |
|
"grad_norm": 0.6246720552444458, |
|
"learning_rate": 3.8521866601514784e-05, |
|
"loss": 0.8292, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.2306151365612938, |
|
"grad_norm": 0.7931325435638428, |
|
"learning_rate": 3.847300268751527e-05, |
|
"loss": 0.83, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.23159231934333316, |
|
"grad_norm": 0.4839908480644226, |
|
"learning_rate": 3.842413877351576e-05, |
|
"loss": 0.8544, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.23256950212537256, |
|
"grad_norm": 0.694095253944397, |
|
"learning_rate": 3.837527485951625e-05, |
|
"loss": 0.8168, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.23354668490741193, |
|
"grad_norm": 0.6341009140014648, |
|
"learning_rate": 3.8326410945516736e-05, |
|
"loss": 0.8007, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.2345238676894513, |
|
"grad_norm": 0.6198739409446716, |
|
"learning_rate": 3.8277547031517224e-05, |
|
"loss": 0.8222, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.2355010504714907, |
|
"grad_norm": 0.7246755361557007, |
|
"learning_rate": 3.822868311751772e-05, |
|
"loss": 0.8239, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.23647823325353007, |
|
"grad_norm": 1.1782780885696411, |
|
"learning_rate": 3.81798192035182e-05, |
|
"loss": 0.8069, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.23745541603556947, |
|
"grad_norm": 0.7902185320854187, |
|
"learning_rate": 3.8130955289518695e-05, |
|
"loss": 0.8283, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.23843259881760884, |
|
"grad_norm": 1.605393648147583, |
|
"learning_rate": 3.808209137551918e-05, |
|
"loss": 0.7758, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.2394097815996482, |
|
"grad_norm": 0.5076558589935303, |
|
"learning_rate": 3.803322746151967e-05, |
|
"loss": 0.8178, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.2403869643816876, |
|
"grad_norm": 0.777646005153656, |
|
"learning_rate": 3.798436354752016e-05, |
|
"loss": 0.8074, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.24136414716372698, |
|
"grad_norm": 1.3850637674331665, |
|
"learning_rate": 3.793549963352065e-05, |
|
"loss": 0.8058, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.24234132994576635, |
|
"grad_norm": 0.6476046442985535, |
|
"learning_rate": 3.7886635719521136e-05, |
|
"loss": 0.7967, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.24331851272780575, |
|
"grad_norm": 0.5768633484840393, |
|
"learning_rate": 3.7837771805521624e-05, |
|
"loss": 0.8269, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.24429569550984512, |
|
"grad_norm": 0.7800481915473938, |
|
"learning_rate": 3.778890789152211e-05, |
|
"loss": 0.8237, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.2452728782918845, |
|
"grad_norm": 0.591273844242096, |
|
"learning_rate": 3.77400439775226e-05, |
|
"loss": 0.8045, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.2462500610739239, |
|
"grad_norm": 0.5170730352401733, |
|
"learning_rate": 3.769118006352309e-05, |
|
"loss": 0.818, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.24722724385596326, |
|
"grad_norm": 0.7280113101005554, |
|
"learning_rate": 3.7642316149523576e-05, |
|
"loss": 0.806, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.24820442663800263, |
|
"grad_norm": 0.48092082142829895, |
|
"learning_rate": 3.759345223552407e-05, |
|
"loss": 0.804, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.24918160942004203, |
|
"grad_norm": 0.8031238913536072, |
|
"learning_rate": 3.754458832152455e-05, |
|
"loss": 0.8031, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.2501587922020814, |
|
"grad_norm": 0.5290892720222473, |
|
"learning_rate": 3.749572440752505e-05, |
|
"loss": 0.816, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.25113597498412077, |
|
"grad_norm": 1.850685477256775, |
|
"learning_rate": 3.7446860493525535e-05, |
|
"loss": 0.8241, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.25211315776616017, |
|
"grad_norm": 0.9196923971176147, |
|
"learning_rate": 3.739799657952602e-05, |
|
"loss": 0.8115, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.25309034054819957, |
|
"grad_norm": 0.8779144883155823, |
|
"learning_rate": 3.734913266552651e-05, |
|
"loss": 0.8065, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.2540675233302389, |
|
"grad_norm": 0.6696827411651611, |
|
"learning_rate": 3.7300268751527e-05, |
|
"loss": 0.7827, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.2550447061122783, |
|
"grad_norm": 0.5037100315093994, |
|
"learning_rate": 3.725140483752749e-05, |
|
"loss": 0.7955, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.2560218888943177, |
|
"grad_norm": 1.4716683626174927, |
|
"learning_rate": 3.7202540923527975e-05, |
|
"loss": 0.8076, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.25699907167635705, |
|
"grad_norm": 0.7515909671783447, |
|
"learning_rate": 3.7153677009528463e-05, |
|
"loss": 0.7645, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.25797625445839645, |
|
"grad_norm": 0.8641912341117859, |
|
"learning_rate": 3.710481309552896e-05, |
|
"loss": 0.7794, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.25895343724043585, |
|
"grad_norm": 0.7385029792785645, |
|
"learning_rate": 3.705594918152944e-05, |
|
"loss": 0.8047, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.2599306200224752, |
|
"grad_norm": 1.194313645362854, |
|
"learning_rate": 3.700708526752993e-05, |
|
"loss": 0.7973, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.2609078028045146, |
|
"grad_norm": 0.8573377728462219, |
|
"learning_rate": 3.695822135353042e-05, |
|
"loss": 0.8054, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.261884985586554, |
|
"grad_norm": 0.7428358793258667, |
|
"learning_rate": 3.6909357439530904e-05, |
|
"loss": 0.8194, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.26286216836859333, |
|
"grad_norm": 1.1976490020751953, |
|
"learning_rate": 3.68604935255314e-05, |
|
"loss": 0.7745, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.26383935115063273, |
|
"grad_norm": 0.8391226530075073, |
|
"learning_rate": 3.681162961153189e-05, |
|
"loss": 0.7981, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.2648165339326721, |
|
"grad_norm": 1.0753370523452759, |
|
"learning_rate": 3.6762765697532375e-05, |
|
"loss": 0.8018, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.26579371671471147, |
|
"grad_norm": 0.8495202660560608, |
|
"learning_rate": 3.671390178353286e-05, |
|
"loss": 0.7894, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.26677089949675087, |
|
"grad_norm": 2.3333170413970947, |
|
"learning_rate": 3.666503786953335e-05, |
|
"loss": 0.7892, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.26774808227879027, |
|
"grad_norm": 0.7213625311851501, |
|
"learning_rate": 3.661617395553384e-05, |
|
"loss": 0.7902, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.2687252650608296, |
|
"grad_norm": 1.045614242553711, |
|
"learning_rate": 3.6567310041534334e-05, |
|
"loss": 0.7719, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.269702447842869, |
|
"grad_norm": 0.42100274562835693, |
|
"learning_rate": 3.6518446127534815e-05, |
|
"loss": 0.7705, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.2706796306249084, |
|
"grad_norm": 0.5944122076034546, |
|
"learning_rate": 3.646958221353531e-05, |
|
"loss": 0.7717, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.27165681340694775, |
|
"grad_norm": 0.7398585677146912, |
|
"learning_rate": 3.642071829953579e-05, |
|
"loss": 0.7896, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.27263399618898715, |
|
"grad_norm": 0.8064782023429871, |
|
"learning_rate": 3.6371854385536286e-05, |
|
"loss": 0.7917, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.27361117897102655, |
|
"grad_norm": 0.6715266108512878, |
|
"learning_rate": 3.6322990471536774e-05, |
|
"loss": 0.7771, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.2745883617530659, |
|
"grad_norm": 1.1130329370498657, |
|
"learning_rate": 3.6274126557537255e-05, |
|
"loss": 0.7476, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.2755655445351053, |
|
"grad_norm": 0.7601907253265381, |
|
"learning_rate": 3.622526264353775e-05, |
|
"loss": 0.7745, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.2765427273171447, |
|
"grad_norm": 0.8511783480644226, |
|
"learning_rate": 3.617639872953824e-05, |
|
"loss": 0.7737, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.27751991009918403, |
|
"grad_norm": 0.8136917948722839, |
|
"learning_rate": 3.6127534815538726e-05, |
|
"loss": 0.7905, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.27849709288122343, |
|
"grad_norm": 0.5580685138702393, |
|
"learning_rate": 3.6078670901539214e-05, |
|
"loss": 0.7957, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.27947427566326283, |
|
"grad_norm": 0.750845730304718, |
|
"learning_rate": 3.60298069875397e-05, |
|
"loss": 0.7396, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.28045145844530217, |
|
"grad_norm": 0.9611383080482483, |
|
"learning_rate": 3.598094307354019e-05, |
|
"loss": 0.774, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.28142864122734157, |
|
"grad_norm": 0.6622794270515442, |
|
"learning_rate": 3.5932079159540685e-05, |
|
"loss": 0.7993, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.28240582400938097, |
|
"grad_norm": 0.4816977381706238, |
|
"learning_rate": 3.588321524554117e-05, |
|
"loss": 0.7868, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.2833830067914203, |
|
"grad_norm": 0.6779691576957703, |
|
"learning_rate": 3.583435133154166e-05, |
|
"loss": 0.7838, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.2843601895734597, |
|
"grad_norm": 0.9714117646217346, |
|
"learning_rate": 3.578548741754214e-05, |
|
"loss": 0.7686, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.2853373723554991, |
|
"grad_norm": 0.7163410186767578, |
|
"learning_rate": 3.573662350354264e-05, |
|
"loss": 0.7747, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.28631455513753845, |
|
"grad_norm": 0.7338354587554932, |
|
"learning_rate": 3.5687759589543126e-05, |
|
"loss": 0.7703, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.28729173791957785, |
|
"grad_norm": 0.765074610710144, |
|
"learning_rate": 3.5638895675543614e-05, |
|
"loss": 0.7811, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.28826892070161725, |
|
"grad_norm": 0.6714346408843994, |
|
"learning_rate": 3.55900317615441e-05, |
|
"loss": 0.7971, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.2892461034836566, |
|
"grad_norm": 0.6784923672676086, |
|
"learning_rate": 3.554116784754459e-05, |
|
"loss": 0.7704, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.290223286265696, |
|
"grad_norm": 0.6446245312690735, |
|
"learning_rate": 3.549230393354508e-05, |
|
"loss": 0.7843, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.2912004690477354, |
|
"grad_norm": 0.9739934206008911, |
|
"learning_rate": 3.5443440019545566e-05, |
|
"loss": 0.7423, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.2921776518297748, |
|
"grad_norm": 0.2898177206516266, |
|
"learning_rate": 3.5394576105546054e-05, |
|
"loss": 0.7322, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.29315483461181413, |
|
"grad_norm": 0.720974862575531, |
|
"learning_rate": 3.534571219154654e-05, |
|
"loss": 0.7593, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.29413201739385353, |
|
"grad_norm": 0.4672446548938751, |
|
"learning_rate": 3.529684827754704e-05, |
|
"loss": 0.7422, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.2951092001758929, |
|
"grad_norm": 0.7546716332435608, |
|
"learning_rate": 3.524798436354752e-05, |
|
"loss": 0.7788, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.29608638295793227, |
|
"grad_norm": 0.6265705823898315, |
|
"learning_rate": 3.519912044954801e-05, |
|
"loss": 0.745, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.29706356573997167, |
|
"grad_norm": 1.092965841293335, |
|
"learning_rate": 3.51502565355485e-05, |
|
"loss": 0.789, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.29804074852201107, |
|
"grad_norm": 0.7648272514343262, |
|
"learning_rate": 3.510139262154899e-05, |
|
"loss": 0.758, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.2990179313040504, |
|
"grad_norm": 0.785746157169342, |
|
"learning_rate": 3.505252870754948e-05, |
|
"loss": 0.7744, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.2999951140860898, |
|
"grad_norm": 0.8007264733314514, |
|
"learning_rate": 3.5003664793549965e-05, |
|
"loss": 0.7696, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.3009722968681292, |
|
"grad_norm": 1.1369248628616333, |
|
"learning_rate": 3.4954800879550453e-05, |
|
"loss": 0.7667, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.30194947965016855, |
|
"grad_norm": 0.6251523494720459, |
|
"learning_rate": 3.490593696555095e-05, |
|
"loss": 0.7686, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.30292666243220795, |
|
"grad_norm": 1.1552335023880005, |
|
"learning_rate": 3.485707305155143e-05, |
|
"loss": 0.7693, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.30390384521424735, |
|
"grad_norm": 0.9136368036270142, |
|
"learning_rate": 3.480820913755192e-05, |
|
"loss": 0.7898, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.3048810279962867, |
|
"grad_norm": 0.4203650951385498, |
|
"learning_rate": 3.4759345223552406e-05, |
|
"loss": 0.7541, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.3058582107783261, |
|
"grad_norm": 0.671546995639801, |
|
"learning_rate": 3.4710481309552894e-05, |
|
"loss": 0.735, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.3068353935603655, |
|
"grad_norm": 0.6711509227752686, |
|
"learning_rate": 3.466161739555339e-05, |
|
"loss": 0.7481, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.30781257634240483, |
|
"grad_norm": 0.7787076234817505, |
|
"learning_rate": 3.461275348155387e-05, |
|
"loss": 0.7701, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.30878975912444423, |
|
"grad_norm": 0.5270808935165405, |
|
"learning_rate": 3.4563889567554365e-05, |
|
"loss": 0.7166, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.30976694190648363, |
|
"grad_norm": 0.7732633352279663, |
|
"learning_rate": 3.451502565355485e-05, |
|
"loss": 0.7857, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.31074412468852297, |
|
"grad_norm": 0.6347182989120483, |
|
"learning_rate": 3.446616173955534e-05, |
|
"loss": 0.7384, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.31172130747056237, |
|
"grad_norm": 0.9557164311408997, |
|
"learning_rate": 3.441729782555583e-05, |
|
"loss": 0.755, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.31269849025260177, |
|
"grad_norm": 0.8120887279510498, |
|
"learning_rate": 3.436843391155632e-05, |
|
"loss": 0.7356, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.3136756730346411, |
|
"grad_norm": 0.6804450750350952, |
|
"learning_rate": 3.4319569997556805e-05, |
|
"loss": 0.785, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.3146528558166805, |
|
"grad_norm": 0.7511081695556641, |
|
"learning_rate": 3.42707060835573e-05, |
|
"loss": 0.7427, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.3156300385987199, |
|
"grad_norm": 0.8396822214126587, |
|
"learning_rate": 3.422184216955778e-05, |
|
"loss": 0.7801, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.31660722138075925, |
|
"grad_norm": 1.0063520669937134, |
|
"learning_rate": 3.4172978255558276e-05, |
|
"loss": 0.7638, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.31758440416279865, |
|
"grad_norm": 1.349414587020874, |
|
"learning_rate": 3.412411434155876e-05, |
|
"loss": 0.7522, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.31856158694483805, |
|
"grad_norm": 0.8259103298187256, |
|
"learning_rate": 3.4075250427559245e-05, |
|
"loss": 0.7351, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.3195387697268774, |
|
"grad_norm": 0.4894813597202301, |
|
"learning_rate": 3.402638651355974e-05, |
|
"loss": 0.7593, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.3205159525089168, |
|
"grad_norm": 0.6558930277824402, |
|
"learning_rate": 3.397752259956022e-05, |
|
"loss": 0.7496, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.3214931352909562, |
|
"grad_norm": 1.2009482383728027, |
|
"learning_rate": 3.3928658685560716e-05, |
|
"loss": 0.7379, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.32247031807299553, |
|
"grad_norm": 0.8621765375137329, |
|
"learning_rate": 3.3879794771561204e-05, |
|
"loss": 0.7381, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.32344750085503493, |
|
"grad_norm": 0.5097255706787109, |
|
"learning_rate": 3.383093085756169e-05, |
|
"loss": 0.7567, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.32442468363707433, |
|
"grad_norm": 0.48458051681518555, |
|
"learning_rate": 3.378206694356218e-05, |
|
"loss": 0.7649, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.3254018664191137, |
|
"grad_norm": 0.7467001676559448, |
|
"learning_rate": 3.373320302956267e-05, |
|
"loss": 0.7612, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.32637904920115307, |
|
"grad_norm": 1.1591566801071167, |
|
"learning_rate": 3.368433911556316e-05, |
|
"loss": 0.7394, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.32735623198319247, |
|
"grad_norm": 0.9665714502334595, |
|
"learning_rate": 3.363547520156365e-05, |
|
"loss": 0.7472, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.3283334147652318, |
|
"grad_norm": 0.5714060664176941, |
|
"learning_rate": 3.358661128756413e-05, |
|
"loss": 0.7385, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.3293105975472712, |
|
"grad_norm": 0.8278976082801819, |
|
"learning_rate": 3.353774737356463e-05, |
|
"loss": 0.724, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.3302877803293106, |
|
"grad_norm": 0.9210988283157349, |
|
"learning_rate": 3.3488883459565116e-05, |
|
"loss": 0.7542, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.33126496311134995, |
|
"grad_norm": 1.0610690116882324, |
|
"learning_rate": 3.3440019545565604e-05, |
|
"loss": 0.7284, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.33224214589338935, |
|
"grad_norm": 0.6521257162094116, |
|
"learning_rate": 3.339115563156609e-05, |
|
"loss": 0.755, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.33321932867542875, |
|
"grad_norm": 1.0515367984771729, |
|
"learning_rate": 3.334229171756657e-05, |
|
"loss": 0.7423, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.3341965114574681, |
|
"grad_norm": 0.8415219783782959, |
|
"learning_rate": 3.329342780356707e-05, |
|
"loss": 0.716, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.3351736942395075, |
|
"grad_norm": 0.5018264651298523, |
|
"learning_rate": 3.3244563889567556e-05, |
|
"loss": 0.7556, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.3361508770215469, |
|
"grad_norm": 0.6532925963401794, |
|
"learning_rate": 3.3195699975568044e-05, |
|
"loss": 0.7335, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.3371280598035863, |
|
"grad_norm": 0.6794486045837402, |
|
"learning_rate": 3.314683606156853e-05, |
|
"loss": 0.7466, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.33810524258562563, |
|
"grad_norm": 0.7372865080833435, |
|
"learning_rate": 3.309797214756902e-05, |
|
"loss": 0.727, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.33908242536766503, |
|
"grad_norm": 0.6354756355285645, |
|
"learning_rate": 3.304910823356951e-05, |
|
"loss": 0.725, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.34005960814970443, |
|
"grad_norm": 0.7180996537208557, |
|
"learning_rate": 3.300024431957e-05, |
|
"loss": 0.7049, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.34103679093174377, |
|
"grad_norm": 1.3991978168487549, |
|
"learning_rate": 3.2951380405570484e-05, |
|
"loss": 0.7251, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.34201397371378317, |
|
"grad_norm": 0.5680633783340454, |
|
"learning_rate": 3.290251649157098e-05, |
|
"loss": 0.744, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.34299115649582257, |
|
"grad_norm": 0.5309197306632996, |
|
"learning_rate": 3.285365257757147e-05, |
|
"loss": 0.7277, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.3439683392778619, |
|
"grad_norm": 1.449625849723816, |
|
"learning_rate": 3.2804788663571955e-05, |
|
"loss": 0.7127, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.3449455220599013, |
|
"grad_norm": 0.6244996190071106, |
|
"learning_rate": 3.2755924749572443e-05, |
|
"loss": 0.6992, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.3459227048419407, |
|
"grad_norm": 1.037988305091858, |
|
"learning_rate": 3.270706083557293e-05, |
|
"loss": 0.7095, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.34689988762398005, |
|
"grad_norm": 1.2503726482391357, |
|
"learning_rate": 3.265819692157342e-05, |
|
"loss": 0.7264, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.34787707040601945, |
|
"grad_norm": 1.2136774063110352, |
|
"learning_rate": 3.260933300757391e-05, |
|
"loss": 0.7418, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.34885425318805885, |
|
"grad_norm": 0.9328750371932983, |
|
"learning_rate": 3.2560469093574396e-05, |
|
"loss": 0.7509, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.3498314359700982, |
|
"grad_norm": 0.5122935771942139, |
|
"learning_rate": 3.2511605179574884e-05, |
|
"loss": 0.7114, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.3508086187521376, |
|
"grad_norm": 1.153583288192749, |
|
"learning_rate": 3.246274126557537e-05, |
|
"loss": 0.7316, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.351785801534177, |
|
"grad_norm": 0.7405250668525696, |
|
"learning_rate": 3.241387735157586e-05, |
|
"loss": 0.7404, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.35276298431621633, |
|
"grad_norm": 0.607565701007843, |
|
"learning_rate": 3.2365013437576355e-05, |
|
"loss": 0.7196, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.35374016709825573, |
|
"grad_norm": 1.4975577592849731, |
|
"learning_rate": 3.2316149523576836e-05, |
|
"loss": 0.703, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.35471734988029513, |
|
"grad_norm": 0.9088447093963623, |
|
"learning_rate": 3.226728560957733e-05, |
|
"loss": 0.7203, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.3556945326623345, |
|
"grad_norm": 0.9132680892944336, |
|
"learning_rate": 3.221842169557782e-05, |
|
"loss": 0.7248, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.35667171544437387, |
|
"grad_norm": 0.7861882448196411, |
|
"learning_rate": 3.216955778157831e-05, |
|
"loss": 0.7118, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.35764889822641327, |
|
"grad_norm": 1.2251768112182617, |
|
"learning_rate": 3.2120693867578795e-05, |
|
"loss": 0.7304, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.3586260810084526, |
|
"grad_norm": 1.1924370527267456, |
|
"learning_rate": 3.207182995357928e-05, |
|
"loss": 0.7394, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.359603263790492, |
|
"grad_norm": 0.7275030016899109, |
|
"learning_rate": 3.202296603957977e-05, |
|
"loss": 0.7399, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.3605804465725314, |
|
"grad_norm": 0.7406324148178101, |
|
"learning_rate": 3.1974102125580266e-05, |
|
"loss": 0.7432, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.36155762935457075, |
|
"grad_norm": 1.0701793432235718, |
|
"learning_rate": 3.192523821158075e-05, |
|
"loss": 0.7099, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.36253481213661015, |
|
"grad_norm": 0.7077426314353943, |
|
"learning_rate": 3.1876374297581235e-05, |
|
"loss": 0.7127, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.36351199491864955, |
|
"grad_norm": 0.5806621313095093, |
|
"learning_rate": 3.1827510383581723e-05, |
|
"loss": 0.7002, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.3644891777006889, |
|
"grad_norm": 1.1311944723129272, |
|
"learning_rate": 3.177864646958221e-05, |
|
"loss": 0.6876, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.3654663604827283, |
|
"grad_norm": 0.9112023711204529, |
|
"learning_rate": 3.1729782555582706e-05, |
|
"loss": 0.7169, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.3664435432647677, |
|
"grad_norm": 0.5986848473548889, |
|
"learning_rate": 3.168091864158319e-05, |
|
"loss": 0.7294, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.36742072604680703, |
|
"grad_norm": 1.297155737876892, |
|
"learning_rate": 3.163205472758368e-05, |
|
"loss": 0.7061, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.36839790882884643, |
|
"grad_norm": 0.6597927808761597, |
|
"learning_rate": 3.158319081358417e-05, |
|
"loss": 0.7166, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.36937509161088583, |
|
"grad_norm": 0.36105087399482727, |
|
"learning_rate": 3.153432689958466e-05, |
|
"loss": 0.7017, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.3703522743929252, |
|
"grad_norm": 0.5487505197525024, |
|
"learning_rate": 3.148546298558515e-05, |
|
"loss": 0.7081, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.3713294571749646, |
|
"grad_norm": 1.5384310483932495, |
|
"learning_rate": 3.1436599071585635e-05, |
|
"loss": 0.7064, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.37230663995700397, |
|
"grad_norm": 1.0113205909729004, |
|
"learning_rate": 3.138773515758612e-05, |
|
"loss": 0.7197, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.3732838227390433, |
|
"grad_norm": 1.4755492210388184, |
|
"learning_rate": 3.133887124358662e-05, |
|
"loss": 0.755, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.3742610055210827, |
|
"grad_norm": 0.7554188370704651, |
|
"learning_rate": 3.12900073295871e-05, |
|
"loss": 0.7083, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.3752381883031221, |
|
"grad_norm": 0.7589747905731201, |
|
"learning_rate": 3.1241143415587594e-05, |
|
"loss": 0.6917, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.37621537108516145, |
|
"grad_norm": 0.485612690448761, |
|
"learning_rate": 3.119227950158808e-05, |
|
"loss": 0.7429, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.37719255386720085, |
|
"grad_norm": 0.5043421983718872, |
|
"learning_rate": 3.114341558758856e-05, |
|
"loss": 0.7217, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.37816973664924025, |
|
"grad_norm": 1.6078003644943237, |
|
"learning_rate": 3.109455167358906e-05, |
|
"loss": 0.7019, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 0.3607342839241028, |
|
"learning_rate": 3.104568775958954e-05, |
|
"loss": 0.772, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.380124102213319, |
|
"grad_norm": 1.002525806427002, |
|
"learning_rate": 3.0996823845590034e-05, |
|
"loss": 0.7213, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.3811012849953584, |
|
"grad_norm": 0.7605811357498169, |
|
"learning_rate": 3.094795993159052e-05, |
|
"loss": 0.7, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.3820784677773978, |
|
"grad_norm": 2.388939619064331, |
|
"learning_rate": 3.089909601759101e-05, |
|
"loss": 0.7307, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.38305565055943713, |
|
"grad_norm": 0.824883222579956, |
|
"learning_rate": 3.08502321035915e-05, |
|
"loss": 0.7255, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.38403283334147653, |
|
"grad_norm": 0.6755787134170532, |
|
"learning_rate": 3.0801368189591986e-05, |
|
"loss": 0.7013, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.38501001612351593, |
|
"grad_norm": 0.580859899520874, |
|
"learning_rate": 3.0752504275592474e-05, |
|
"loss": 0.7357, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.3859871989055553, |
|
"grad_norm": 0.6988548636436462, |
|
"learning_rate": 3.070364036159297e-05, |
|
"loss": 0.6902, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.38696438168759467, |
|
"grad_norm": 0.5997043251991272, |
|
"learning_rate": 3.065477644759345e-05, |
|
"loss": 0.7093, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.38794156446963407, |
|
"grad_norm": 0.7906262874603271, |
|
"learning_rate": 3.0605912533593945e-05, |
|
"loss": 0.7376, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.3889187472516734, |
|
"grad_norm": 0.7436035871505737, |
|
"learning_rate": 3.0557048619594433e-05, |
|
"loss": 0.7159, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.3898959300337128, |
|
"grad_norm": 0.6913009285926819, |
|
"learning_rate": 3.050818470559492e-05, |
|
"loss": 0.7267, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.3908731128157522, |
|
"grad_norm": 1.0030348300933838, |
|
"learning_rate": 3.045932079159541e-05, |
|
"loss": 0.7186, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.39185029559779155, |
|
"grad_norm": 0.7223851084709167, |
|
"learning_rate": 3.0410456877595894e-05, |
|
"loss": 0.7113, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.39282747837983095, |
|
"grad_norm": 1.0449798107147217, |
|
"learning_rate": 3.0361592963596386e-05, |
|
"loss": 0.6985, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.39380466116187035, |
|
"grad_norm": 0.7078452110290527, |
|
"learning_rate": 3.031272904959687e-05, |
|
"loss": 0.714, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.3947818439439097, |
|
"grad_norm": 0.5977550148963928, |
|
"learning_rate": 3.0263865135597362e-05, |
|
"loss": 0.7126, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.3957590267259491, |
|
"grad_norm": 0.6963929533958435, |
|
"learning_rate": 3.021500122159785e-05, |
|
"loss": 0.6922, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.3967362095079885, |
|
"grad_norm": 0.49735382199287415, |
|
"learning_rate": 3.016613730759834e-05, |
|
"loss": 0.6914, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.39771339229002783, |
|
"grad_norm": 0.8894415497779846, |
|
"learning_rate": 3.0117273393598826e-05, |
|
"loss": 0.6988, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.39869057507206723, |
|
"grad_norm": 0.5845156311988831, |
|
"learning_rate": 3.0068409479599317e-05, |
|
"loss": 0.705, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.39966775785410663, |
|
"grad_norm": 0.7496864199638367, |
|
"learning_rate": 3.0019545565599806e-05, |
|
"loss": 0.669, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.400644940636146, |
|
"grad_norm": 1.2446004152297974, |
|
"learning_rate": 2.9970681651600297e-05, |
|
"loss": 0.7063, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.4016221234181854, |
|
"grad_norm": 0.37521255016326904, |
|
"learning_rate": 2.992181773760078e-05, |
|
"loss": 0.6966, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.40259930620022477, |
|
"grad_norm": 0.7953245639801025, |
|
"learning_rate": 2.9872953823601273e-05, |
|
"loss": 0.6934, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.4035764889822641, |
|
"grad_norm": 0.844543993473053, |
|
"learning_rate": 2.982408990960176e-05, |
|
"loss": 0.6926, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.4045536717643035, |
|
"grad_norm": 0.5298857688903809, |
|
"learning_rate": 2.9775225995602253e-05, |
|
"loss": 0.6926, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.4055308545463429, |
|
"grad_norm": 0.6932188272476196, |
|
"learning_rate": 2.9726362081602737e-05, |
|
"loss": 0.6868, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.40650803732838225, |
|
"grad_norm": 0.7204051613807678, |
|
"learning_rate": 2.9677498167603225e-05, |
|
"loss": 0.7064, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.40748522011042165, |
|
"grad_norm": 1.0420963764190674, |
|
"learning_rate": 2.9628634253603717e-05, |
|
"loss": 0.7072, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.40846240289246105, |
|
"grad_norm": 0.4677026867866516, |
|
"learning_rate": 2.95797703396042e-05, |
|
"loss": 0.691, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.4094395856745004, |
|
"grad_norm": 0.6934903860092163, |
|
"learning_rate": 2.9530906425604693e-05, |
|
"loss": 0.6962, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.4104167684565398, |
|
"grad_norm": 0.7500805854797363, |
|
"learning_rate": 2.9482042511605178e-05, |
|
"loss": 0.708, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.4113939512385792, |
|
"grad_norm": 0.8887515664100647, |
|
"learning_rate": 2.943317859760567e-05, |
|
"loss": 0.702, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.41237113402061853, |
|
"grad_norm": 0.39899566769599915, |
|
"learning_rate": 2.9384314683606157e-05, |
|
"loss": 0.709, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.41334831680265793, |
|
"grad_norm": 0.8467943668365479, |
|
"learning_rate": 2.933545076960665e-05, |
|
"loss": 0.6928, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.41432549958469733, |
|
"grad_norm": 0.6024282574653625, |
|
"learning_rate": 2.9286586855607133e-05, |
|
"loss": 0.6928, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.4153026823667367, |
|
"grad_norm": 0.7921658158302307, |
|
"learning_rate": 2.9237722941607625e-05, |
|
"loss": 0.6865, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.4162798651487761, |
|
"grad_norm": 0.9025784730911255, |
|
"learning_rate": 2.9188859027608113e-05, |
|
"loss": 0.6863, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.4172570479308155, |
|
"grad_norm": 0.9453756809234619, |
|
"learning_rate": 2.9139995113608604e-05, |
|
"loss": 0.6924, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.4182342307128548, |
|
"grad_norm": 0.8638947010040283, |
|
"learning_rate": 2.909113119960909e-05, |
|
"loss": 0.7011, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.4192114134948942, |
|
"grad_norm": 0.6639747619628906, |
|
"learning_rate": 2.904226728560958e-05, |
|
"loss": 0.6766, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.4201885962769336, |
|
"grad_norm": 0.7019941210746765, |
|
"learning_rate": 2.899340337161007e-05, |
|
"loss": 0.7025, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.42116577905897296, |
|
"grad_norm": 0.6988587379455566, |
|
"learning_rate": 2.8944539457610553e-05, |
|
"loss": 0.6768, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.42214296184101235, |
|
"grad_norm": 0.5817476511001587, |
|
"learning_rate": 2.8895675543611045e-05, |
|
"loss": 0.702, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.42312014462305175, |
|
"grad_norm": 0.4533466398715973, |
|
"learning_rate": 2.8846811629611533e-05, |
|
"loss": 0.6949, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.4240973274050911, |
|
"grad_norm": 0.6197069883346558, |
|
"learning_rate": 2.8797947715612024e-05, |
|
"loss": 0.684, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.4250745101871305, |
|
"grad_norm": 1.693144679069519, |
|
"learning_rate": 2.874908380161251e-05, |
|
"loss": 0.7201, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.4260516929691699, |
|
"grad_norm": 1.1772024631500244, |
|
"learning_rate": 2.8700219887613e-05, |
|
"loss": 0.6936, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.4270288757512093, |
|
"grad_norm": 0.5265709161758423, |
|
"learning_rate": 2.8651355973613485e-05, |
|
"loss": 0.6994, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.42800605853324863, |
|
"grad_norm": 0.8301248550415039, |
|
"learning_rate": 2.8602492059613976e-05, |
|
"loss": 0.6968, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.42898324131528803, |
|
"grad_norm": 1.2123380899429321, |
|
"learning_rate": 2.8553628145614464e-05, |
|
"loss": 0.7013, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.42996042409732743, |
|
"grad_norm": 1.3780418634414673, |
|
"learning_rate": 2.8504764231614956e-05, |
|
"loss": 0.6826, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.4309376068793668, |
|
"grad_norm": 0.6333886981010437, |
|
"learning_rate": 2.845590031761544e-05, |
|
"loss": 0.6842, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.4319147896614062, |
|
"grad_norm": 0.5353469252586365, |
|
"learning_rate": 2.8407036403615932e-05, |
|
"loss": 0.6751, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.43289197244344557, |
|
"grad_norm": 0.9482343792915344, |
|
"learning_rate": 2.835817248961642e-05, |
|
"loss": 0.6961, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.4338691552254849, |
|
"grad_norm": 0.7306164503097534, |
|
"learning_rate": 2.830930857561691e-05, |
|
"loss": 0.6829, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.4348463380075243, |
|
"grad_norm": 0.9290406107902527, |
|
"learning_rate": 2.8260444661617396e-05, |
|
"loss": 0.7109, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.4358235207895637, |
|
"grad_norm": 0.5903436541557312, |
|
"learning_rate": 2.8211580747617884e-05, |
|
"loss": 0.7144, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.43680070357160306, |
|
"grad_norm": 0.7370823621749878, |
|
"learning_rate": 2.8162716833618376e-05, |
|
"loss": 0.6858, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.43777788635364245, |
|
"grad_norm": 0.5477197766304016, |
|
"learning_rate": 2.811385291961886e-05, |
|
"loss": 0.6951, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.43875506913568185, |
|
"grad_norm": 0.8994666934013367, |
|
"learning_rate": 2.8064989005619352e-05, |
|
"loss": 0.705, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.4397322519177212, |
|
"grad_norm": 1.171186089515686, |
|
"learning_rate": 2.8016125091619836e-05, |
|
"loss": 0.6812, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.4407094346997606, |
|
"grad_norm": 0.6986414194107056, |
|
"learning_rate": 2.796726117762033e-05, |
|
"loss": 0.6729, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.4416866174818, |
|
"grad_norm": 0.8245409727096558, |
|
"learning_rate": 2.7918397263620816e-05, |
|
"loss": 0.6679, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.44266380026383934, |
|
"grad_norm": 0.8805913925170898, |
|
"learning_rate": 2.7869533349621307e-05, |
|
"loss": 0.7042, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.44364098304587873, |
|
"grad_norm": 0.7037094831466675, |
|
"learning_rate": 2.7820669435621792e-05, |
|
"loss": 0.6988, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.44461816582791813, |
|
"grad_norm": 1.118363380432129, |
|
"learning_rate": 2.7771805521622284e-05, |
|
"loss": 0.6866, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.4455953486099575, |
|
"grad_norm": 1.0665768384933472, |
|
"learning_rate": 2.772294160762277e-05, |
|
"loss": 0.6732, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.4465725313919969, |
|
"grad_norm": 0.7593882083892822, |
|
"learning_rate": 2.7674077693623263e-05, |
|
"loss": 0.6951, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.4475497141740363, |
|
"grad_norm": 2.3182179927825928, |
|
"learning_rate": 2.7625213779623748e-05, |
|
"loss": 0.6695, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.4485268969560756, |
|
"grad_norm": 1.2548315525054932, |
|
"learning_rate": 2.757634986562424e-05, |
|
"loss": 0.7128, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.449504079738115, |
|
"grad_norm": 0.8613176941871643, |
|
"learning_rate": 2.7527485951624727e-05, |
|
"loss": 0.6956, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.4504812625201544, |
|
"grad_norm": 0.946165919303894, |
|
"learning_rate": 2.7478622037625212e-05, |
|
"loss": 0.7177, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.45145844530219376, |
|
"grad_norm": 0.9122072458267212, |
|
"learning_rate": 2.7429758123625703e-05, |
|
"loss": 0.7094, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.45243562808423315, |
|
"grad_norm": 0.8797391057014465, |
|
"learning_rate": 2.738089420962619e-05, |
|
"loss": 0.7118, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.45341281086627255, |
|
"grad_norm": 0.5321417450904846, |
|
"learning_rate": 2.7332030295626683e-05, |
|
"loss": 0.6923, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.4543899936483119, |
|
"grad_norm": 1.0878016948699951, |
|
"learning_rate": 2.7283166381627168e-05, |
|
"loss": 0.72, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.4553671764303513, |
|
"grad_norm": 0.8534865975379944, |
|
"learning_rate": 2.723430246762766e-05, |
|
"loss": 0.6945, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.4563443592123907, |
|
"grad_norm": 0.8475703597068787, |
|
"learning_rate": 2.7185438553628144e-05, |
|
"loss": 0.6891, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.45732154199443004, |
|
"grad_norm": 0.7100959420204163, |
|
"learning_rate": 2.713657463962864e-05, |
|
"loss": 0.6605, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.45829872477646943, |
|
"grad_norm": 0.6616931557655334, |
|
"learning_rate": 2.7087710725629123e-05, |
|
"loss": 0.6678, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.45927590755850883, |
|
"grad_norm": 1.2114359140396118, |
|
"learning_rate": 2.7038846811629615e-05, |
|
"loss": 0.6525, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.4602530903405482, |
|
"grad_norm": 0.4216634929180145, |
|
"learning_rate": 2.69899828976301e-05, |
|
"loss": 0.6881, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.4612302731225876, |
|
"grad_norm": 0.7598534822463989, |
|
"learning_rate": 2.694111898363059e-05, |
|
"loss": 0.6555, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.462207455904627, |
|
"grad_norm": 0.9792212843894958, |
|
"learning_rate": 2.689225506963108e-05, |
|
"loss": 0.6866, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.4631846386866663, |
|
"grad_norm": 0.5867584943771362, |
|
"learning_rate": 2.684339115563157e-05, |
|
"loss": 0.6541, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.4641618214687057, |
|
"grad_norm": 0.8288137912750244, |
|
"learning_rate": 2.6794527241632055e-05, |
|
"loss": 0.7057, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.4651390042507451, |
|
"grad_norm": 1.5305638313293457, |
|
"learning_rate": 2.6745663327632543e-05, |
|
"loss": 0.6752, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.46611618703278446, |
|
"grad_norm": 1.0784820318222046, |
|
"learning_rate": 2.6696799413633035e-05, |
|
"loss": 0.7041, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.46709336981482386, |
|
"grad_norm": 0.7708161473274231, |
|
"learning_rate": 2.664793549963352e-05, |
|
"loss": 0.6766, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.46807055259686325, |
|
"grad_norm": 0.7639223337173462, |
|
"learning_rate": 2.659907158563401e-05, |
|
"loss": 0.6553, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.4690477353789026, |
|
"grad_norm": 0.4256194233894348, |
|
"learning_rate": 2.65502076716345e-05, |
|
"loss": 0.6921, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.470024918160942, |
|
"grad_norm": 1.2620900869369507, |
|
"learning_rate": 2.650134375763499e-05, |
|
"loss": 0.6945, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.4710021009429814, |
|
"grad_norm": 0.7683165073394775, |
|
"learning_rate": 2.6452479843635475e-05, |
|
"loss": 0.6594, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.4719792837250208, |
|
"grad_norm": 0.784582257270813, |
|
"learning_rate": 2.6403615929635966e-05, |
|
"loss": 0.6877, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.47295646650706014, |
|
"grad_norm": 0.7894740104675293, |
|
"learning_rate": 2.635475201563645e-05, |
|
"loss": 0.6944, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.47393364928909953, |
|
"grad_norm": 0.6949831247329712, |
|
"learning_rate": 2.6305888101636942e-05, |
|
"loss": 0.6625, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.47491083207113893, |
|
"grad_norm": 0.5648496747016907, |
|
"learning_rate": 2.625702418763743e-05, |
|
"loss": 0.6489, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.4758880148531783, |
|
"grad_norm": 0.8879817128181458, |
|
"learning_rate": 2.6208160273637922e-05, |
|
"loss": 0.6465, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.4768651976352177, |
|
"grad_norm": 0.5845817923545837, |
|
"learning_rate": 2.6159296359638407e-05, |
|
"loss": 0.7044, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.4778423804172571, |
|
"grad_norm": 0.8040775060653687, |
|
"learning_rate": 2.6110432445638898e-05, |
|
"loss": 0.6745, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.4788195631992964, |
|
"grad_norm": 0.5439351201057434, |
|
"learning_rate": 2.6061568531639386e-05, |
|
"loss": 0.6924, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.4797967459813358, |
|
"grad_norm": 1.1411272287368774, |
|
"learning_rate": 2.601270461763987e-05, |
|
"loss": 0.6834, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.4807739287633752, |
|
"grad_norm": 0.7273046374320984, |
|
"learning_rate": 2.5963840703640362e-05, |
|
"loss": 0.6547, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.48175111154541456, |
|
"grad_norm": 0.9065064787864685, |
|
"learning_rate": 2.591497678964085e-05, |
|
"loss": 0.6792, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.48272829432745396, |
|
"grad_norm": 0.6722708344459534, |
|
"learning_rate": 2.5866112875641342e-05, |
|
"loss": 0.6913, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.48370547710949335, |
|
"grad_norm": 0.6576828360557556, |
|
"learning_rate": 2.5817248961641826e-05, |
|
"loss": 0.6741, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.4846826598915327, |
|
"grad_norm": 0.46869999170303345, |
|
"learning_rate": 2.5768385047642318e-05, |
|
"loss": 0.6729, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.4856598426735721, |
|
"grad_norm": 0.735565185546875, |
|
"learning_rate": 2.5719521133642806e-05, |
|
"loss": 0.6781, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.4866370254556115, |
|
"grad_norm": 0.6392993927001953, |
|
"learning_rate": 2.5670657219643297e-05, |
|
"loss": 0.6824, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.48761420823765084, |
|
"grad_norm": 3.2004761695861816, |
|
"learning_rate": 2.5621793305643782e-05, |
|
"loss": 0.6862, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.48859139101969024, |
|
"grad_norm": 0.6201328635215759, |
|
"learning_rate": 2.5572929391644274e-05, |
|
"loss": 0.664, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.48956857380172963, |
|
"grad_norm": 1.179991364479065, |
|
"learning_rate": 2.5524065477644758e-05, |
|
"loss": 0.6841, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.490545756583769, |
|
"grad_norm": 0.942451000213623, |
|
"learning_rate": 2.547520156364525e-05, |
|
"loss": 0.6555, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.4915229393658084, |
|
"grad_norm": 1.1190769672393799, |
|
"learning_rate": 2.5426337649645738e-05, |
|
"loss": 0.673, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.4925001221478478, |
|
"grad_norm": 0.712053656578064, |
|
"learning_rate": 2.537747373564623e-05, |
|
"loss": 0.6849, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.4934773049298871, |
|
"grad_norm": 1.3936710357666016, |
|
"learning_rate": 2.5328609821646714e-05, |
|
"loss": 0.6751, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.4944544877119265, |
|
"grad_norm": 0.5909391045570374, |
|
"learning_rate": 2.5279745907647205e-05, |
|
"loss": 0.683, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.4954316704939659, |
|
"grad_norm": 0.8883010149002075, |
|
"learning_rate": 2.5230881993647693e-05, |
|
"loss": 0.6806, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.49640885327600526, |
|
"grad_norm": 0.7069185376167297, |
|
"learning_rate": 2.5182018079648178e-05, |
|
"loss": 0.6779, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.49738603605804466, |
|
"grad_norm": 0.7906535267829895, |
|
"learning_rate": 2.513315416564867e-05, |
|
"loss": 0.663, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.49836321884008405, |
|
"grad_norm": 1.8775051832199097, |
|
"learning_rate": 2.5084290251649158e-05, |
|
"loss": 0.6924, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.4993404016221234, |
|
"grad_norm": 0.4028649628162384, |
|
"learning_rate": 2.503542633764965e-05, |
|
"loss": 0.6611, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.5003175844041629, |
|
"grad_norm": 0.8514829277992249, |
|
"learning_rate": 2.4986562423650137e-05, |
|
"loss": 0.6423, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.5012947671862021, |
|
"grad_norm": 0.5659759044647217, |
|
"learning_rate": 2.4937698509650625e-05, |
|
"loss": 0.6978, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.5022719499682415, |
|
"grad_norm": 0.8396779298782349, |
|
"learning_rate": 2.488883459565111e-05, |
|
"loss": 0.6593, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.5032491327502809, |
|
"grad_norm": 0.6824951767921448, |
|
"learning_rate": 2.48399706816516e-05, |
|
"loss": 0.6839, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.5042263155323203, |
|
"grad_norm": 0.6299941539764404, |
|
"learning_rate": 2.479110676765209e-05, |
|
"loss": 0.6743, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.5052034983143597, |
|
"grad_norm": 1.2409921884536743, |
|
"learning_rate": 2.4742242853652577e-05, |
|
"loss": 0.6477, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.5061806810963991, |
|
"grad_norm": 0.668393075466156, |
|
"learning_rate": 2.4693378939653065e-05, |
|
"loss": 0.6568, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.5071578638784384, |
|
"grad_norm": 0.5376803278923035, |
|
"learning_rate": 2.4644515025653557e-05, |
|
"loss": 0.6476, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.5081350466604778, |
|
"grad_norm": 1.710288166999817, |
|
"learning_rate": 2.4595651111654045e-05, |
|
"loss": 0.6404, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.5091122294425172, |
|
"grad_norm": 0.6142415404319763, |
|
"learning_rate": 2.4546787197654533e-05, |
|
"loss": 0.7026, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.5100894122245566, |
|
"grad_norm": 0.4976397454738617, |
|
"learning_rate": 2.449792328365502e-05, |
|
"loss": 0.6659, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.511066595006596, |
|
"grad_norm": 0.8558853268623352, |
|
"learning_rate": 2.4449059369655513e-05, |
|
"loss": 0.67, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.5120437777886354, |
|
"grad_norm": 0.620583713054657, |
|
"learning_rate": 2.4400195455656e-05, |
|
"loss": 0.6596, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.5130209605706747, |
|
"grad_norm": 0.8520305752754211, |
|
"learning_rate": 2.435133154165649e-05, |
|
"loss": 0.653, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.5139981433527141, |
|
"grad_norm": 0.43671169877052307, |
|
"learning_rate": 2.4302467627656977e-05, |
|
"loss": 0.6554, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.5149753261347535, |
|
"grad_norm": 0.5502797961235046, |
|
"learning_rate": 2.4253603713657465e-05, |
|
"loss": 0.6432, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.5159525089167929, |
|
"grad_norm": 0.918704628944397, |
|
"learning_rate": 2.4204739799657956e-05, |
|
"loss": 0.6604, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.5169296916988323, |
|
"grad_norm": 0.44583848118782043, |
|
"learning_rate": 2.415587588565844e-05, |
|
"loss": 0.6736, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.5179068744808717, |
|
"grad_norm": 0.8312250971794128, |
|
"learning_rate": 2.410701197165893e-05, |
|
"loss": 0.6645, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.518884057262911, |
|
"grad_norm": 0.39499637484550476, |
|
"learning_rate": 2.4058148057659417e-05, |
|
"loss": 0.6876, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.5198612400449504, |
|
"grad_norm": 0.5650041699409485, |
|
"learning_rate": 2.400928414365991e-05, |
|
"loss": 0.691, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.5208384228269898, |
|
"grad_norm": 0.7247036099433899, |
|
"learning_rate": 2.3960420229660397e-05, |
|
"loss": 0.6367, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.5218156056090292, |
|
"grad_norm": 0.8500406742095947, |
|
"learning_rate": 2.3911556315660885e-05, |
|
"loss": 0.6678, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.5227927883910686, |
|
"grad_norm": 1.2467963695526123, |
|
"learning_rate": 2.3862692401661373e-05, |
|
"loss": 0.6439, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.523769971173108, |
|
"grad_norm": 1.0069133043289185, |
|
"learning_rate": 2.3813828487661864e-05, |
|
"loss": 0.6555, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.5247471539551473, |
|
"grad_norm": 0.9213836193084717, |
|
"learning_rate": 2.3764964573662352e-05, |
|
"loss": 0.6374, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.5257243367371867, |
|
"grad_norm": 0.7063928246498108, |
|
"learning_rate": 2.371610065966284e-05, |
|
"loss": 0.6473, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.5267015195192261, |
|
"grad_norm": 0.7876357436180115, |
|
"learning_rate": 2.366723674566333e-05, |
|
"loss": 0.6599, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.5276787023012655, |
|
"grad_norm": 0.5371726751327515, |
|
"learning_rate": 2.3618372831663816e-05, |
|
"loss": 0.6569, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.5286558850833049, |
|
"grad_norm": 0.6501371264457703, |
|
"learning_rate": 2.3569508917664308e-05, |
|
"loss": 0.6502, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.5296330678653443, |
|
"grad_norm": 1.9818251132965088, |
|
"learning_rate": 2.3520645003664796e-05, |
|
"loss": 0.6628, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.5306102506473835, |
|
"grad_norm": 0.6198662519454956, |
|
"learning_rate": 2.3471781089665284e-05, |
|
"loss": 0.6771, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.5315874334294229, |
|
"grad_norm": 0.70624840259552, |
|
"learning_rate": 2.3422917175665772e-05, |
|
"loss": 0.6685, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.5325646162114623, |
|
"grad_norm": 0.5182805061340332, |
|
"learning_rate": 2.337405326166626e-05, |
|
"loss": 0.6651, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.5335417989935017, |
|
"grad_norm": 1.0862709283828735, |
|
"learning_rate": 2.3325189347666748e-05, |
|
"loss": 0.668, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.5345189817755411, |
|
"grad_norm": 0.5830691456794739, |
|
"learning_rate": 2.3276325433667236e-05, |
|
"loss": 0.67, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.5354961645575805, |
|
"grad_norm": 0.5614120960235596, |
|
"learning_rate": 2.3227461519667724e-05, |
|
"loss": 0.6466, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.5364733473396198, |
|
"grad_norm": 0.6346180438995361, |
|
"learning_rate": 2.3178597605668216e-05, |
|
"loss": 0.6784, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.5374505301216592, |
|
"grad_norm": 0.5453216433525085, |
|
"learning_rate": 2.3129733691668704e-05, |
|
"loss": 0.6507, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.5384277129036986, |
|
"grad_norm": 0.8145617246627808, |
|
"learning_rate": 2.3080869777669192e-05, |
|
"loss": 0.6874, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.539404895685738, |
|
"grad_norm": 0.8334397673606873, |
|
"learning_rate": 2.303200586366968e-05, |
|
"loss": 0.6772, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.5403820784677774, |
|
"grad_norm": 0.5468283295631409, |
|
"learning_rate": 2.298314194967017e-05, |
|
"loss": 0.6448, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.5413592612498168, |
|
"grad_norm": 0.8369360566139221, |
|
"learning_rate": 2.293427803567066e-05, |
|
"loss": 0.6593, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.5423364440318562, |
|
"grad_norm": 0.498793363571167, |
|
"learning_rate": 2.2885414121671148e-05, |
|
"loss": 0.6236, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.5433136268138955, |
|
"grad_norm": 0.6096756458282471, |
|
"learning_rate": 2.2836550207671636e-05, |
|
"loss": 0.6766, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.5442908095959349, |
|
"grad_norm": 0.8249727487564087, |
|
"learning_rate": 2.2787686293672124e-05, |
|
"loss": 0.654, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.5452679923779743, |
|
"grad_norm": 0.9821385145187378, |
|
"learning_rate": 2.2738822379672615e-05, |
|
"loss": 0.6633, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.5462451751600137, |
|
"grad_norm": 1.025420069694519, |
|
"learning_rate": 2.26899584656731e-05, |
|
"loss": 0.6691, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.5472223579420531, |
|
"grad_norm": 1.1872769594192505, |
|
"learning_rate": 2.2641094551673588e-05, |
|
"loss": 0.6811, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.5481995407240925, |
|
"grad_norm": 0.6862273812294006, |
|
"learning_rate": 2.259223063767408e-05, |
|
"loss": 0.6503, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.5491767235061318, |
|
"grad_norm": 1.9515796899795532, |
|
"learning_rate": 2.2543366723674567e-05, |
|
"loss": 0.6672, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.5501539062881712, |
|
"grad_norm": 1.5116077661514282, |
|
"learning_rate": 2.2494502809675055e-05, |
|
"loss": 0.6714, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.5511310890702106, |
|
"grad_norm": 0.710858166217804, |
|
"learning_rate": 2.2445638895675544e-05, |
|
"loss": 0.6577, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.55210827185225, |
|
"grad_norm": 0.6870605945587158, |
|
"learning_rate": 2.239677498167603e-05, |
|
"loss": 0.6655, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.5530854546342894, |
|
"grad_norm": 0.802883505821228, |
|
"learning_rate": 2.2347911067676523e-05, |
|
"loss": 0.6812, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.5540626374163288, |
|
"grad_norm": 1.244555115699768, |
|
"learning_rate": 2.229904715367701e-05, |
|
"loss": 0.655, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.5550398201983681, |
|
"grad_norm": 0.7662067413330078, |
|
"learning_rate": 2.22501832396775e-05, |
|
"loss": 0.6867, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.5560170029804075, |
|
"grad_norm": 0.9172037839889526, |
|
"learning_rate": 2.2201319325677987e-05, |
|
"loss": 0.6427, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.5569941857624469, |
|
"grad_norm": 0.8700697422027588, |
|
"learning_rate": 2.215245541167848e-05, |
|
"loss": 0.6959, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.5579713685444863, |
|
"grad_norm": 1.1184202432632446, |
|
"learning_rate": 2.2103591497678967e-05, |
|
"loss": 0.6601, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.5589485513265257, |
|
"grad_norm": 1.1001787185668945, |
|
"learning_rate": 2.2054727583679455e-05, |
|
"loss": 0.6753, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.559925734108565, |
|
"grad_norm": 0.29295894503593445, |
|
"learning_rate": 2.2005863669679943e-05, |
|
"loss": 0.625, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.5609029168906043, |
|
"grad_norm": 0.5778409242630005, |
|
"learning_rate": 2.195699975568043e-05, |
|
"loss": 0.6554, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.5618800996726437, |
|
"grad_norm": 0.8341584801673889, |
|
"learning_rate": 2.190813584168092e-05, |
|
"loss": 0.6324, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.5628572824546831, |
|
"grad_norm": 1.329548716545105, |
|
"learning_rate": 2.1859271927681407e-05, |
|
"loss": 0.6657, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.5638344652367225, |
|
"grad_norm": 0.6559785604476929, |
|
"learning_rate": 2.1810408013681895e-05, |
|
"loss": 0.6411, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.5648116480187619, |
|
"grad_norm": 1.1021350622177124, |
|
"learning_rate": 2.1761544099682387e-05, |
|
"loss": 0.6363, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.5657888308008013, |
|
"grad_norm": 1.0015547275543213, |
|
"learning_rate": 2.1712680185682875e-05, |
|
"loss": 0.632, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.5667660135828406, |
|
"grad_norm": 0.7394452691078186, |
|
"learning_rate": 2.1663816271683363e-05, |
|
"loss": 0.6882, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.56774319636488, |
|
"grad_norm": 1.0177232027053833, |
|
"learning_rate": 2.161495235768385e-05, |
|
"loss": 0.659, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 1.182385802268982, |
|
"learning_rate": 2.156608844368434e-05, |
|
"loss": 0.6304, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.5696975619289588, |
|
"grad_norm": 0.6992839574813843, |
|
"learning_rate": 2.151722452968483e-05, |
|
"loss": 0.6419, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.5706747447109982, |
|
"grad_norm": 1.127772331237793, |
|
"learning_rate": 2.146836061568532e-05, |
|
"loss": 0.6762, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.5716519274930376, |
|
"grad_norm": 1.0480372905731201, |
|
"learning_rate": 2.1419496701685806e-05, |
|
"loss": 0.649, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.5726291102750769, |
|
"grad_norm": 0.62301105260849, |
|
"learning_rate": 2.1370632787686295e-05, |
|
"loss": 0.6423, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.5736062930571163, |
|
"grad_norm": 0.7996447086334229, |
|
"learning_rate": 2.1321768873686786e-05, |
|
"loss": 0.6675, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.5745834758391557, |
|
"grad_norm": 0.8735845685005188, |
|
"learning_rate": 2.1272904959687274e-05, |
|
"loss": 0.6251, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.5755606586211951, |
|
"grad_norm": 1.0168455839157104, |
|
"learning_rate": 2.1224041045687762e-05, |
|
"loss": 0.6623, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.5765378414032345, |
|
"grad_norm": 0.7308356165885925, |
|
"learning_rate": 2.1175177131688247e-05, |
|
"loss": 0.6613, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.5775150241852739, |
|
"grad_norm": 1.2486464977264404, |
|
"learning_rate": 2.1126313217688738e-05, |
|
"loss": 0.6424, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.5784922069673132, |
|
"grad_norm": 0.8921827077865601, |
|
"learning_rate": 2.1077449303689226e-05, |
|
"loss": 0.6403, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.5794693897493526, |
|
"grad_norm": 0.5246706604957581, |
|
"learning_rate": 2.1028585389689714e-05, |
|
"loss": 0.6494, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.580446572531392, |
|
"grad_norm": 0.8651568293571472, |
|
"learning_rate": 2.0979721475690202e-05, |
|
"loss": 0.6352, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.5814237553134314, |
|
"grad_norm": 0.9502151608467102, |
|
"learning_rate": 2.093085756169069e-05, |
|
"loss": 0.6661, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.5824009380954708, |
|
"grad_norm": 0.6827490925788879, |
|
"learning_rate": 2.0881993647691182e-05, |
|
"loss": 0.625, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.5833781208775102, |
|
"grad_norm": 0.8105266690254211, |
|
"learning_rate": 2.083312973369167e-05, |
|
"loss": 0.6699, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.5843553036595496, |
|
"grad_norm": 1.005845308303833, |
|
"learning_rate": 2.0784265819692158e-05, |
|
"loss": 0.6528, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.5853324864415889, |
|
"grad_norm": 0.8736119270324707, |
|
"learning_rate": 2.0735401905692646e-05, |
|
"loss": 0.6691, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.5863096692236283, |
|
"grad_norm": 0.8782946467399597, |
|
"learning_rate": 2.0686537991693138e-05, |
|
"loss": 0.6677, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.5872868520056677, |
|
"grad_norm": 0.7457369565963745, |
|
"learning_rate": 2.0637674077693626e-05, |
|
"loss": 0.6323, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.5882640347877071, |
|
"grad_norm": 1.0230743885040283, |
|
"learning_rate": 2.0588810163694114e-05, |
|
"loss": 0.6521, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.5892412175697465, |
|
"grad_norm": 0.8328123688697815, |
|
"learning_rate": 2.0539946249694602e-05, |
|
"loss": 0.6356, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.5902184003517859, |
|
"grad_norm": 0.7374850511550903, |
|
"learning_rate": 2.049108233569509e-05, |
|
"loss": 0.6669, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.5911955831338251, |
|
"grad_norm": 0.505228579044342, |
|
"learning_rate": 2.0442218421695578e-05, |
|
"loss": 0.6734, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.5921727659158645, |
|
"grad_norm": 0.8307722210884094, |
|
"learning_rate": 2.0393354507696066e-05, |
|
"loss": 0.657, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.5931499486979039, |
|
"grad_norm": 0.8867704272270203, |
|
"learning_rate": 2.0344490593696554e-05, |
|
"loss": 0.6407, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.5941271314799433, |
|
"grad_norm": 0.716373085975647, |
|
"learning_rate": 2.0295626679697045e-05, |
|
"loss": 0.6428, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.5951043142619827, |
|
"grad_norm": 0.5812042355537415, |
|
"learning_rate": 2.0246762765697534e-05, |
|
"loss": 0.63, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.5960814970440221, |
|
"grad_norm": 1.0057129859924316, |
|
"learning_rate": 2.019789885169802e-05, |
|
"loss": 0.6161, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.5970586798260614, |
|
"grad_norm": 0.6143211126327515, |
|
"learning_rate": 2.014903493769851e-05, |
|
"loss": 0.6454, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.5980358626081008, |
|
"grad_norm": 1.038710594177246, |
|
"learning_rate": 2.0100171023698998e-05, |
|
"loss": 0.6701, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.5990130453901402, |
|
"grad_norm": 0.6891298294067383, |
|
"learning_rate": 2.005130710969949e-05, |
|
"loss": 0.6666, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.5999902281721796, |
|
"grad_norm": 0.7872188091278076, |
|
"learning_rate": 2.0002443195699977e-05, |
|
"loss": 0.6357, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.600967410954219, |
|
"grad_norm": 1.2167768478393555, |
|
"learning_rate": 1.9953579281700465e-05, |
|
"loss": 0.6686, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.6019445937362584, |
|
"grad_norm": 1.0418341159820557, |
|
"learning_rate": 1.9904715367700953e-05, |
|
"loss": 0.6356, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.6029217765182977, |
|
"grad_norm": 0.6209270358085632, |
|
"learning_rate": 1.9855851453701445e-05, |
|
"loss": 0.657, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.6038989593003371, |
|
"grad_norm": 0.8585149645805359, |
|
"learning_rate": 1.9806987539701933e-05, |
|
"loss": 0.6157, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.6048761420823765, |
|
"grad_norm": 0.5286767482757568, |
|
"learning_rate": 1.975812362570242e-05, |
|
"loss": 0.6734, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.6058533248644159, |
|
"grad_norm": 0.6499518156051636, |
|
"learning_rate": 1.9709259711702906e-05, |
|
"loss": 0.6545, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.6068305076464553, |
|
"grad_norm": 1.4340311288833618, |
|
"learning_rate": 1.9660395797703397e-05, |
|
"loss": 0.6402, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.6078076904284947, |
|
"grad_norm": 0.4783228039741516, |
|
"learning_rate": 1.9611531883703885e-05, |
|
"loss": 0.6495, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.608784873210534, |
|
"grad_norm": 0.6510328054428101, |
|
"learning_rate": 1.9562667969704373e-05, |
|
"loss": 0.6398, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.6097620559925734, |
|
"grad_norm": 0.7298358082771301, |
|
"learning_rate": 1.951380405570486e-05, |
|
"loss": 0.6406, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.6107392387746128, |
|
"grad_norm": 0.7467713952064514, |
|
"learning_rate": 1.9464940141705353e-05, |
|
"loss": 0.6618, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.6117164215566522, |
|
"grad_norm": 1.1706078052520752, |
|
"learning_rate": 1.941607622770584e-05, |
|
"loss": 0.6603, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.6126936043386916, |
|
"grad_norm": 1.9863495826721191, |
|
"learning_rate": 1.936721231370633e-05, |
|
"loss": 0.628, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.613670787120731, |
|
"grad_norm": 1.1297212839126587, |
|
"learning_rate": 1.9318348399706817e-05, |
|
"loss": 0.6198, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.6146479699027703, |
|
"grad_norm": 0.6895560026168823, |
|
"learning_rate": 1.9269484485707305e-05, |
|
"loss": 0.654, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.6156251526848097, |
|
"grad_norm": 0.5572859644889832, |
|
"learning_rate": 1.9220620571707796e-05, |
|
"loss": 0.6237, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.6166023354668491, |
|
"grad_norm": 1.7625269889831543, |
|
"learning_rate": 1.9171756657708284e-05, |
|
"loss": 0.6615, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.6175795182488885, |
|
"grad_norm": 0.9473828673362732, |
|
"learning_rate": 1.9122892743708773e-05, |
|
"loss": 0.624, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.6185567010309279, |
|
"grad_norm": 1.6622077226638794, |
|
"learning_rate": 1.907402882970926e-05, |
|
"loss": 0.648, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.6195338838129673, |
|
"grad_norm": 0.889667809009552, |
|
"learning_rate": 1.9025164915709752e-05, |
|
"loss": 0.6321, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.6205110665950065, |
|
"grad_norm": 0.7613341212272644, |
|
"learning_rate": 1.8976301001710237e-05, |
|
"loss": 0.637, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.6214882493770459, |
|
"grad_norm": 0.9912586212158203, |
|
"learning_rate": 1.8927437087710725e-05, |
|
"loss": 0.6422, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.6224654321590853, |
|
"grad_norm": 0.7905563712120056, |
|
"learning_rate": 1.8878573173711213e-05, |
|
"loss": 0.6362, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.6234426149411247, |
|
"grad_norm": 0.4368293881416321, |
|
"learning_rate": 1.8829709259711704e-05, |
|
"loss": 0.6472, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.6244197977231641, |
|
"grad_norm": 0.8466482758522034, |
|
"learning_rate": 1.8780845345712192e-05, |
|
"loss": 0.673, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.6253969805052035, |
|
"grad_norm": 1.4137593507766724, |
|
"learning_rate": 1.873198143171268e-05, |
|
"loss": 0.6382, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.6263741632872428, |
|
"grad_norm": 1.7590171098709106, |
|
"learning_rate": 1.868311751771317e-05, |
|
"loss": 0.6421, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.6273513460692822, |
|
"grad_norm": 0.7667103409767151, |
|
"learning_rate": 1.863425360371366e-05, |
|
"loss": 0.6448, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.6283285288513216, |
|
"grad_norm": 1.0524508953094482, |
|
"learning_rate": 1.8585389689714148e-05, |
|
"loss": 0.6491, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.629305711633361, |
|
"grad_norm": 0.6090672612190247, |
|
"learning_rate": 1.8536525775714636e-05, |
|
"loss": 0.6416, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.6302828944154004, |
|
"grad_norm": 0.5970349311828613, |
|
"learning_rate": 1.8487661861715124e-05, |
|
"loss": 0.6393, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.6312600771974398, |
|
"grad_norm": 0.9564999341964722, |
|
"learning_rate": 1.8438797947715612e-05, |
|
"loss": 0.6656, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.6322372599794792, |
|
"grad_norm": 1.319643259048462, |
|
"learning_rate": 1.8389934033716104e-05, |
|
"loss": 0.6372, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.6332144427615185, |
|
"grad_norm": 1.0311692953109741, |
|
"learning_rate": 1.8341070119716592e-05, |
|
"loss": 0.6377, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.6341916255435579, |
|
"grad_norm": 0.5185050964355469, |
|
"learning_rate": 1.829220620571708e-05, |
|
"loss": 0.6489, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.6351688083255973, |
|
"grad_norm": 1.0611315965652466, |
|
"learning_rate": 1.8243342291717564e-05, |
|
"loss": 0.6262, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.6361459911076367, |
|
"grad_norm": 0.5177842974662781, |
|
"learning_rate": 1.8194478377718056e-05, |
|
"loss": 0.6424, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.6371231738896761, |
|
"grad_norm": 0.6148577928543091, |
|
"learning_rate": 1.8145614463718544e-05, |
|
"loss": 0.6402, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.6381003566717155, |
|
"grad_norm": 0.686576247215271, |
|
"learning_rate": 1.8096750549719032e-05, |
|
"loss": 0.6361, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.6390775394537548, |
|
"grad_norm": 1.5292381048202515, |
|
"learning_rate": 1.804788663571952e-05, |
|
"loss": 0.6263, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.6400547222357942, |
|
"grad_norm": 0.7201911807060242, |
|
"learning_rate": 1.799902272172001e-05, |
|
"loss": 0.6402, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.6410319050178336, |
|
"grad_norm": 0.7407404184341431, |
|
"learning_rate": 1.79501588077205e-05, |
|
"loss": 0.6149, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.642009087799873, |
|
"grad_norm": 0.7911986708641052, |
|
"learning_rate": 1.7901294893720988e-05, |
|
"loss": 0.6273, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.6429862705819124, |
|
"grad_norm": 0.467869371175766, |
|
"learning_rate": 1.7852430979721476e-05, |
|
"loss": 0.6344, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.6439634533639518, |
|
"grad_norm": 1.0182818174362183, |
|
"learning_rate": 1.7803567065721967e-05, |
|
"loss": 0.612, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.6449406361459911, |
|
"grad_norm": 0.5325811505317688, |
|
"learning_rate": 1.7754703151722455e-05, |
|
"loss": 0.6427, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.6459178189280305, |
|
"grad_norm": 1.1324542760849, |
|
"learning_rate": 1.7705839237722943e-05, |
|
"loss": 0.6161, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.6468950017100699, |
|
"grad_norm": 0.7836804389953613, |
|
"learning_rate": 1.765697532372343e-05, |
|
"loss": 0.632, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.6478721844921093, |
|
"grad_norm": 0.6157903075218201, |
|
"learning_rate": 1.760811140972392e-05, |
|
"loss": 0.6497, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.6488493672741487, |
|
"grad_norm": 0.776150643825531, |
|
"learning_rate": 1.755924749572441e-05, |
|
"loss": 0.5929, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.6498265500561881, |
|
"grad_norm": 0.6307646036148071, |
|
"learning_rate": 1.7510383581724896e-05, |
|
"loss": 0.66, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.6508037328382273, |
|
"grad_norm": 0.5305992364883423, |
|
"learning_rate": 1.7461519667725384e-05, |
|
"loss": 0.5985, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.6517809156202667, |
|
"grad_norm": 0.6581500172615051, |
|
"learning_rate": 1.7412655753725872e-05, |
|
"loss": 0.6393, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.6527580984023061, |
|
"grad_norm": 1.0988273620605469, |
|
"learning_rate": 1.7363791839726363e-05, |
|
"loss": 0.6453, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.6537352811843455, |
|
"grad_norm": 0.6662785410881042, |
|
"learning_rate": 1.731492792572685e-05, |
|
"loss": 0.6831, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.6547124639663849, |
|
"grad_norm": 0.5156288743019104, |
|
"learning_rate": 1.726606401172734e-05, |
|
"loss": 0.647, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.6556896467484243, |
|
"grad_norm": 0.8832482695579529, |
|
"learning_rate": 1.7217200097727827e-05, |
|
"loss": 0.6263, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.6566668295304636, |
|
"grad_norm": 0.8194277882575989, |
|
"learning_rate": 1.716833618372832e-05, |
|
"loss": 0.6293, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.657644012312503, |
|
"grad_norm": 0.5544142127037048, |
|
"learning_rate": 1.7119472269728807e-05, |
|
"loss": 0.6207, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.6586211950945424, |
|
"grad_norm": 1.0161030292510986, |
|
"learning_rate": 1.7070608355729295e-05, |
|
"loss": 0.6166, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.6595983778765818, |
|
"grad_norm": 1.1273646354675293, |
|
"learning_rate": 1.7021744441729783e-05, |
|
"loss": 0.6326, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.6605755606586212, |
|
"grad_norm": 0.5743687748908997, |
|
"learning_rate": 1.697288052773027e-05, |
|
"loss": 0.5943, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.6615527434406606, |
|
"grad_norm": 0.5743625164031982, |
|
"learning_rate": 1.6924016613730763e-05, |
|
"loss": 0.6337, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.6625299262226999, |
|
"grad_norm": 0.47358232736587524, |
|
"learning_rate": 1.687515269973125e-05, |
|
"loss": 0.6272, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.6635071090047393, |
|
"grad_norm": 0.7825568318367004, |
|
"learning_rate": 1.682628878573174e-05, |
|
"loss": 0.6407, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.6644842917867787, |
|
"grad_norm": 1.0739299058914185, |
|
"learning_rate": 1.6777424871732227e-05, |
|
"loss": 0.6213, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.6654614745688181, |
|
"grad_norm": 0.6242460608482361, |
|
"learning_rate": 1.6728560957732715e-05, |
|
"loss": 0.6247, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.6664386573508575, |
|
"grad_norm": 0.674392580986023, |
|
"learning_rate": 1.6679697043733203e-05, |
|
"loss": 0.6405, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.6674158401328969, |
|
"grad_norm": 0.4114531874656677, |
|
"learning_rate": 1.663083312973369e-05, |
|
"loss": 0.6235, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.6683930229149362, |
|
"grad_norm": 0.5812088847160339, |
|
"learning_rate": 1.658196921573418e-05, |
|
"loss": 0.6175, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.6693702056969756, |
|
"grad_norm": 0.48696669936180115, |
|
"learning_rate": 1.653310530173467e-05, |
|
"loss": 0.6264, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.670347388479015, |
|
"grad_norm": 0.5733768939971924, |
|
"learning_rate": 1.648424138773516e-05, |
|
"loss": 0.6371, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.6713245712610544, |
|
"grad_norm": 0.9609115123748779, |
|
"learning_rate": 1.6435377473735647e-05, |
|
"loss": 0.618, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.6723017540430938, |
|
"grad_norm": 1.226388692855835, |
|
"learning_rate": 1.6386513559736135e-05, |
|
"loss": 0.6499, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.6732789368251332, |
|
"grad_norm": 0.6776556372642517, |
|
"learning_rate": 1.6337649645736626e-05, |
|
"loss": 0.6356, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.6742561196071726, |
|
"grad_norm": 0.6129021644592285, |
|
"learning_rate": 1.6288785731737114e-05, |
|
"loss": 0.6133, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.6752333023892119, |
|
"grad_norm": 1.4161570072174072, |
|
"learning_rate": 1.6239921817737602e-05, |
|
"loss": 0.6419, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.6762104851712513, |
|
"grad_norm": 0.5857706665992737, |
|
"learning_rate": 1.619105790373809e-05, |
|
"loss": 0.6227, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.6771876679532907, |
|
"grad_norm": 0.933807909488678, |
|
"learning_rate": 1.614219398973858e-05, |
|
"loss": 0.6392, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.6781648507353301, |
|
"grad_norm": 0.9411168098449707, |
|
"learning_rate": 1.609333007573907e-05, |
|
"loss": 0.649, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.6791420335173695, |
|
"grad_norm": 0.5923060178756714, |
|
"learning_rate": 1.6044466161739554e-05, |
|
"loss": 0.6286, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.6801192162994089, |
|
"grad_norm": 0.744339108467102, |
|
"learning_rate": 1.5995602247740043e-05, |
|
"loss": 0.6178, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.6810963990814481, |
|
"grad_norm": 1.0202040672302246, |
|
"learning_rate": 1.5946738333740534e-05, |
|
"loss": 0.6254, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.6820735818634875, |
|
"grad_norm": 0.8653994798660278, |
|
"learning_rate": 1.5897874419741022e-05, |
|
"loss": 0.6214, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.6830507646455269, |
|
"grad_norm": 0.4566790461540222, |
|
"learning_rate": 1.584901050574151e-05, |
|
"loss": 0.6517, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.6840279474275663, |
|
"grad_norm": 0.9629371166229248, |
|
"learning_rate": 1.5800146591741998e-05, |
|
"loss": 0.6359, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.6850051302096057, |
|
"grad_norm": 0.7253994941711426, |
|
"learning_rate": 1.5751282677742486e-05, |
|
"loss": 0.6405, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.6859823129916451, |
|
"grad_norm": 0.8287329077720642, |
|
"learning_rate": 1.5702418763742978e-05, |
|
"loss": 0.6085, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.6869594957736844, |
|
"grad_norm": 0.5002869367599487, |
|
"learning_rate": 1.5653554849743466e-05, |
|
"loss": 0.6255, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.6879366785557238, |
|
"grad_norm": 0.4376012682914734, |
|
"learning_rate": 1.5604690935743954e-05, |
|
"loss": 0.5933, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.6889138613377632, |
|
"grad_norm": 0.756737232208252, |
|
"learning_rate": 1.5555827021744442e-05, |
|
"loss": 0.609, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.6898910441198026, |
|
"grad_norm": 1.1462029218673706, |
|
"learning_rate": 1.5506963107744933e-05, |
|
"loss": 0.6349, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.690868226901842, |
|
"grad_norm": 0.5806009769439697, |
|
"learning_rate": 1.545809919374542e-05, |
|
"loss": 0.6242, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.6918454096838814, |
|
"grad_norm": 0.41798803210258484, |
|
"learning_rate": 1.540923527974591e-05, |
|
"loss": 0.6688, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.6928225924659207, |
|
"grad_norm": 0.5598849058151245, |
|
"learning_rate": 1.5360371365746398e-05, |
|
"loss": 0.6371, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.6937997752479601, |
|
"grad_norm": 1.0417990684509277, |
|
"learning_rate": 1.5311507451746886e-05, |
|
"loss": 0.5966, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.6947769580299995, |
|
"grad_norm": 0.5547340512275696, |
|
"learning_rate": 1.5262643537747374e-05, |
|
"loss": 0.6221, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.6957541408120389, |
|
"grad_norm": 0.4499816298484802, |
|
"learning_rate": 1.5213779623747862e-05, |
|
"loss": 0.6194, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.6967313235940783, |
|
"grad_norm": 2.521627902984619, |
|
"learning_rate": 1.5164915709748351e-05, |
|
"loss": 0.6279, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.6977085063761177, |
|
"grad_norm": 1.0940284729003906, |
|
"learning_rate": 1.511605179574884e-05, |
|
"loss": 0.6376, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.698685689158157, |
|
"grad_norm": 0.515785276889801, |
|
"learning_rate": 1.5067187881749328e-05, |
|
"loss": 0.6046, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.6996628719401964, |
|
"grad_norm": 0.5034206509590149, |
|
"learning_rate": 1.5018323967749817e-05, |
|
"loss": 0.6036, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.7006400547222358, |
|
"grad_norm": 0.6637565493583679, |
|
"learning_rate": 1.4969460053750305e-05, |
|
"loss": 0.6288, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.7016172375042752, |
|
"grad_norm": 0.7677326202392578, |
|
"learning_rate": 1.4920596139750795e-05, |
|
"loss": 0.655, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.7025944202863146, |
|
"grad_norm": 0.6796774864196777, |
|
"learning_rate": 1.4871732225751283e-05, |
|
"loss": 0.5955, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.703571603068354, |
|
"grad_norm": 0.9217430353164673, |
|
"learning_rate": 1.4822868311751773e-05, |
|
"loss": 0.6268, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.7045487858503933, |
|
"grad_norm": 0.846118688583374, |
|
"learning_rate": 1.4774004397752261e-05, |
|
"loss": 0.6345, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 0.7055259686324327, |
|
"grad_norm": 0.7406280040740967, |
|
"learning_rate": 1.472514048375275e-05, |
|
"loss": 0.631, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.7065031514144721, |
|
"grad_norm": 0.8265899419784546, |
|
"learning_rate": 1.4676276569753239e-05, |
|
"loss": 0.6135, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 0.7074803341965115, |
|
"grad_norm": 0.7813581228256226, |
|
"learning_rate": 1.4627412655753727e-05, |
|
"loss": 0.6448, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.7084575169785509, |
|
"grad_norm": 0.4718623757362366, |
|
"learning_rate": 1.4578548741754217e-05, |
|
"loss": 0.5952, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.7094346997605903, |
|
"grad_norm": 2.193324565887451, |
|
"learning_rate": 1.4529684827754703e-05, |
|
"loss": 0.6199, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.7104118825426295, |
|
"grad_norm": 1.0357561111450195, |
|
"learning_rate": 1.4480820913755191e-05, |
|
"loss": 0.6342, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 0.711389065324669, |
|
"grad_norm": 1.0319572687149048, |
|
"learning_rate": 1.4431956999755681e-05, |
|
"loss": 0.5836, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.7123662481067083, |
|
"grad_norm": 1.0852116346359253, |
|
"learning_rate": 1.4383093085756169e-05, |
|
"loss": 0.6246, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 0.7133434308887477, |
|
"grad_norm": 0.5591370463371277, |
|
"learning_rate": 1.4334229171756659e-05, |
|
"loss": 0.6022, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.7143206136707871, |
|
"grad_norm": 1.129408836364746, |
|
"learning_rate": 1.4285365257757147e-05, |
|
"loss": 0.6414, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 0.7152977964528265, |
|
"grad_norm": 0.9241653680801392, |
|
"learning_rate": 1.4236501343757635e-05, |
|
"loss": 0.5954, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.7162749792348658, |
|
"grad_norm": 0.5140904188156128, |
|
"learning_rate": 1.4187637429758125e-05, |
|
"loss": 0.6499, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 0.7172521620169052, |
|
"grad_norm": 0.8134740591049194, |
|
"learning_rate": 1.4138773515758613e-05, |
|
"loss": 0.6199, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.7182293447989446, |
|
"grad_norm": 0.8259909749031067, |
|
"learning_rate": 1.4089909601759102e-05, |
|
"loss": 0.6181, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.719206527580984, |
|
"grad_norm": 0.7081485390663147, |
|
"learning_rate": 1.404104568775959e-05, |
|
"loss": 0.6056, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.7201837103630234, |
|
"grad_norm": 0.7906745076179504, |
|
"learning_rate": 1.399218177376008e-05, |
|
"loss": 0.6341, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 0.7211608931450628, |
|
"grad_norm": 0.5661380290985107, |
|
"learning_rate": 1.3943317859760568e-05, |
|
"loss": 0.621, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.7221380759271022, |
|
"grad_norm": 1.0971596240997314, |
|
"learning_rate": 1.3894453945761058e-05, |
|
"loss": 0.6261, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 0.7231152587091415, |
|
"grad_norm": 1.6842643022537231, |
|
"learning_rate": 1.3845590031761546e-05, |
|
"loss": 0.6065, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.7240924414911809, |
|
"grad_norm": 1.0033600330352783, |
|
"learning_rate": 1.3796726117762033e-05, |
|
"loss": 0.6364, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 0.7250696242732203, |
|
"grad_norm": 0.8704243898391724, |
|
"learning_rate": 1.374786220376252e-05, |
|
"loss": 0.6259, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.7260468070552597, |
|
"grad_norm": 0.855398416519165, |
|
"learning_rate": 1.369899828976301e-05, |
|
"loss": 0.653, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 0.7270239898372991, |
|
"grad_norm": 1.733904480934143, |
|
"learning_rate": 1.3650134375763498e-05, |
|
"loss": 0.6284, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.7280011726193385, |
|
"grad_norm": 0.49585819244384766, |
|
"learning_rate": 1.3601270461763988e-05, |
|
"loss": 0.6165, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.7289783554013778, |
|
"grad_norm": 0.5818326473236084, |
|
"learning_rate": 1.3552406547764476e-05, |
|
"loss": 0.6403, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.7299555381834172, |
|
"grad_norm": 0.8778244853019714, |
|
"learning_rate": 1.3503542633764964e-05, |
|
"loss": 0.5963, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 0.7309327209654566, |
|
"grad_norm": 0.6378918290138245, |
|
"learning_rate": 1.3454678719765454e-05, |
|
"loss": 0.6242, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.731909903747496, |
|
"grad_norm": 0.792775571346283, |
|
"learning_rate": 1.3405814805765942e-05, |
|
"loss": 0.6348, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 0.7328870865295354, |
|
"grad_norm": 0.8906835317611694, |
|
"learning_rate": 1.3356950891766432e-05, |
|
"loss": 0.6074, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.7338642693115748, |
|
"grad_norm": 0.7266893982887268, |
|
"learning_rate": 1.330808697776692e-05, |
|
"loss": 0.6253, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 0.7348414520936141, |
|
"grad_norm": 0.6896129250526428, |
|
"learning_rate": 1.325922306376741e-05, |
|
"loss": 0.6273, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.7358186348756535, |
|
"grad_norm": 1.0812867879867554, |
|
"learning_rate": 1.3210359149767898e-05, |
|
"loss": 0.6474, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 0.7367958176576929, |
|
"grad_norm": 0.6664975881576538, |
|
"learning_rate": 1.3161495235768388e-05, |
|
"loss": 0.6114, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.7377730004397323, |
|
"grad_norm": 0.6565041542053223, |
|
"learning_rate": 1.3112631321768876e-05, |
|
"loss": 0.6059, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.7387501832217717, |
|
"grad_norm": 0.5191747546195984, |
|
"learning_rate": 1.3063767407769362e-05, |
|
"loss": 0.6, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.7397273660038111, |
|
"grad_norm": 0.9525347948074341, |
|
"learning_rate": 1.301490349376985e-05, |
|
"loss": 0.6032, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 0.7407045487858503, |
|
"grad_norm": 1.1167237758636475, |
|
"learning_rate": 1.296603957977034e-05, |
|
"loss": 0.6095, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.7416817315678897, |
|
"grad_norm": 0.8300033807754517, |
|
"learning_rate": 1.2917175665770828e-05, |
|
"loss": 0.6246, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 0.7426589143499291, |
|
"grad_norm": 0.7098196148872375, |
|
"learning_rate": 1.2868311751771318e-05, |
|
"loss": 0.6188, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.7436360971319685, |
|
"grad_norm": 0.42002958059310913, |
|
"learning_rate": 1.2819447837771806e-05, |
|
"loss": 0.5943, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 0.7446132799140079, |
|
"grad_norm": 0.7477664947509766, |
|
"learning_rate": 1.2770583923772295e-05, |
|
"loss": 0.6368, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.7455904626960473, |
|
"grad_norm": 1.2381956577301025, |
|
"learning_rate": 1.2721720009772783e-05, |
|
"loss": 0.6528, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 0.7465676454780866, |
|
"grad_norm": 0.46650367975234985, |
|
"learning_rate": 1.2672856095773272e-05, |
|
"loss": 0.6062, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.747544828260126, |
|
"grad_norm": 0.9223760366439819, |
|
"learning_rate": 1.2623992181773761e-05, |
|
"loss": 0.6386, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.7485220110421654, |
|
"grad_norm": 0.6782642602920532, |
|
"learning_rate": 1.257512826777425e-05, |
|
"loss": 0.5926, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.7494991938242048, |
|
"grad_norm": 0.8533148765563965, |
|
"learning_rate": 1.2526264353774739e-05, |
|
"loss": 0.6076, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 0.7504763766062442, |
|
"grad_norm": 0.6998764276504517, |
|
"learning_rate": 1.2477400439775225e-05, |
|
"loss": 0.6136, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.7514535593882836, |
|
"grad_norm": 0.4632514715194702, |
|
"learning_rate": 1.2428536525775715e-05, |
|
"loss": 0.6174, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 0.7524307421703229, |
|
"grad_norm": 0.6624991297721863, |
|
"learning_rate": 1.2379672611776203e-05, |
|
"loss": 0.6053, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.7534079249523623, |
|
"grad_norm": 0.8521330952644348, |
|
"learning_rate": 1.2330808697776693e-05, |
|
"loss": 0.6261, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 0.7543851077344017, |
|
"grad_norm": 0.6917625665664673, |
|
"learning_rate": 1.2281944783777181e-05, |
|
"loss": 0.6049, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.7553622905164411, |
|
"grad_norm": 0.4985372722148895, |
|
"learning_rate": 1.2233080869777671e-05, |
|
"loss": 0.6057, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 0.7563394732984805, |
|
"grad_norm": 0.6484245657920837, |
|
"learning_rate": 1.2184216955778159e-05, |
|
"loss": 0.602, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.7573166560805199, |
|
"grad_norm": 0.7993507981300354, |
|
"learning_rate": 1.2135353041778647e-05, |
|
"loss": 0.5809, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 0.6944275498390198, |
|
"learning_rate": 1.2086489127779135e-05, |
|
"loss": 0.5959, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.7592710216445986, |
|
"grad_norm": 0.6688080430030823, |
|
"learning_rate": 1.2037625213779625e-05, |
|
"loss": 0.6038, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 0.760248204426638, |
|
"grad_norm": 0.8234009742736816, |
|
"learning_rate": 1.1988761299780113e-05, |
|
"loss": 0.6287, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.7612253872086774, |
|
"grad_norm": 1.0987696647644043, |
|
"learning_rate": 1.1939897385780601e-05, |
|
"loss": 0.631, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 0.7622025699907168, |
|
"grad_norm": 0.7760794758796692, |
|
"learning_rate": 1.189103347178109e-05, |
|
"loss": 0.6356, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.7631797527727562, |
|
"grad_norm": 1.422297716140747, |
|
"learning_rate": 1.1842169557781579e-05, |
|
"loss": 0.5983, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 0.7641569355547956, |
|
"grad_norm": 0.7743082046508789, |
|
"learning_rate": 1.1793305643782067e-05, |
|
"loss": 0.6132, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.7651341183368349, |
|
"grad_norm": 1.0263071060180664, |
|
"learning_rate": 1.1744441729782555e-05, |
|
"loss": 0.6364, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 0.7661113011188743, |
|
"grad_norm": 0.49797773361206055, |
|
"learning_rate": 1.1695577815783045e-05, |
|
"loss": 0.6384, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.7670884839009137, |
|
"grad_norm": 0.58949214220047, |
|
"learning_rate": 1.1646713901783533e-05, |
|
"loss": 0.6176, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.7680656666829531, |
|
"grad_norm": 0.8523328304290771, |
|
"learning_rate": 1.1597849987784022e-05, |
|
"loss": 0.6238, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.7690428494649925, |
|
"grad_norm": 2.231853723526001, |
|
"learning_rate": 1.154898607378451e-05, |
|
"loss": 0.6553, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 0.7700200322470319, |
|
"grad_norm": 0.7179421782493591, |
|
"learning_rate": 1.1500122159785e-05, |
|
"loss": 0.6222, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.7709972150290711, |
|
"grad_norm": 0.7334624528884888, |
|
"learning_rate": 1.1451258245785488e-05, |
|
"loss": 0.6513, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 0.7719743978111105, |
|
"grad_norm": 0.8650888204574585, |
|
"learning_rate": 1.1402394331785976e-05, |
|
"loss": 0.6382, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.77295158059315, |
|
"grad_norm": 1.277421474456787, |
|
"learning_rate": 1.1353530417786465e-05, |
|
"loss": 0.6032, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 0.7739287633751893, |
|
"grad_norm": 0.4764556288719177, |
|
"learning_rate": 1.1304666503786954e-05, |
|
"loss": 0.5852, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.7749059461572287, |
|
"grad_norm": 0.7180933952331543, |
|
"learning_rate": 1.1255802589787442e-05, |
|
"loss": 0.6271, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 0.7758831289392681, |
|
"grad_norm": 0.6978940367698669, |
|
"learning_rate": 1.1206938675787932e-05, |
|
"loss": 0.6252, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.7768603117213074, |
|
"grad_norm": 0.9205247759819031, |
|
"learning_rate": 1.115807476178842e-05, |
|
"loss": 0.6227, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.7778374945033468, |
|
"grad_norm": 0.6126120686531067, |
|
"learning_rate": 1.1109210847788908e-05, |
|
"loss": 0.6164, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.7788146772853862, |
|
"grad_norm": 0.660234808921814, |
|
"learning_rate": 1.1060346933789396e-05, |
|
"loss": 0.6336, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 0.7797918600674256, |
|
"grad_norm": 0.5239884257316589, |
|
"learning_rate": 1.1011483019789886e-05, |
|
"loss": 0.6324, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.780769042849465, |
|
"grad_norm": 0.6763221621513367, |
|
"learning_rate": 1.0962619105790374e-05, |
|
"loss": 0.6063, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 0.7817462256315044, |
|
"grad_norm": 0.6201728582382202, |
|
"learning_rate": 1.0913755191790862e-05, |
|
"loss": 0.6168, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.7827234084135437, |
|
"grad_norm": 0.8859091997146606, |
|
"learning_rate": 1.0864891277791352e-05, |
|
"loss": 0.593, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 0.7837005911955831, |
|
"grad_norm": 0.7334877848625183, |
|
"learning_rate": 1.081602736379184e-05, |
|
"loss": 0.6225, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.7846777739776225, |
|
"grad_norm": 0.49573615193367004, |
|
"learning_rate": 1.076716344979233e-05, |
|
"loss": 0.6007, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 0.7856549567596619, |
|
"grad_norm": 1.1509833335876465, |
|
"learning_rate": 1.0718299535792818e-05, |
|
"loss": 0.587, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.7866321395417013, |
|
"grad_norm": 0.6591099500656128, |
|
"learning_rate": 1.0669435621793306e-05, |
|
"loss": 0.6462, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.7876093223237407, |
|
"grad_norm": 0.7265052199363708, |
|
"learning_rate": 1.0620571707793794e-05, |
|
"loss": 0.6183, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.78858650510578, |
|
"grad_norm": 1.2156593799591064, |
|
"learning_rate": 1.0571707793794284e-05, |
|
"loss": 0.5811, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 0.7895636878878194, |
|
"grad_norm": 0.960753858089447, |
|
"learning_rate": 1.0522843879794772e-05, |
|
"loss": 0.6054, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.7905408706698588, |
|
"grad_norm": 1.5062034130096436, |
|
"learning_rate": 1.0473979965795262e-05, |
|
"loss": 0.599, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 0.7915180534518982, |
|
"grad_norm": 0.7047529816627502, |
|
"learning_rate": 1.042511605179575e-05, |
|
"loss": 0.6149, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.7924952362339376, |
|
"grad_norm": 0.4432947337627411, |
|
"learning_rate": 1.037625213779624e-05, |
|
"loss": 0.6182, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 0.793472419015977, |
|
"grad_norm": 0.6442515850067139, |
|
"learning_rate": 1.0327388223796726e-05, |
|
"loss": 0.5864, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.7944496017980163, |
|
"grad_norm": 1.2354743480682373, |
|
"learning_rate": 1.0278524309797215e-05, |
|
"loss": 0.6068, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 0.7954267845800557, |
|
"grad_norm": 0.7862667441368103, |
|
"learning_rate": 1.0229660395797704e-05, |
|
"loss": 0.6072, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.7964039673620951, |
|
"grad_norm": 0.5142656564712524, |
|
"learning_rate": 1.0180796481798192e-05, |
|
"loss": 0.6009, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.7973811501441345, |
|
"grad_norm": 0.8478522300720215, |
|
"learning_rate": 1.0131932567798681e-05, |
|
"loss": 0.5979, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.7983583329261739, |
|
"grad_norm": 0.5929884910583496, |
|
"learning_rate": 1.008306865379917e-05, |
|
"loss": 0.6076, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 0.7993355157082133, |
|
"grad_norm": 0.8067489862442017, |
|
"learning_rate": 1.003420473979966e-05, |
|
"loss": 0.6123, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.8003126984902525, |
|
"grad_norm": 1.3287664651870728, |
|
"learning_rate": 9.985340825800147e-06, |
|
"loss": 0.6151, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 0.801289881272292, |
|
"grad_norm": 0.7158493995666504, |
|
"learning_rate": 9.936476911800635e-06, |
|
"loss": 0.5906, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.8022670640543313, |
|
"grad_norm": 0.7307409644126892, |
|
"learning_rate": 9.887612997801123e-06, |
|
"loss": 0.6165, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 0.8032442468363707, |
|
"grad_norm": 0.6903741359710693, |
|
"learning_rate": 9.838749083801613e-06, |
|
"loss": 0.6175, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.8042214296184101, |
|
"grad_norm": 0.7754660248756409, |
|
"learning_rate": 9.789885169802101e-06, |
|
"loss": 0.6349, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 0.8051986124004495, |
|
"grad_norm": 0.7808040976524353, |
|
"learning_rate": 9.741021255802591e-06, |
|
"loss": 0.5909, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.8061757951824888, |
|
"grad_norm": 0.8575007915496826, |
|
"learning_rate": 9.692157341803079e-06, |
|
"loss": 0.5861, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.8071529779645282, |
|
"grad_norm": 1.18577241897583, |
|
"learning_rate": 9.643293427803569e-06, |
|
"loss": 0.6137, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.8081301607465676, |
|
"grad_norm": 0.7913909554481506, |
|
"learning_rate": 9.594429513804057e-06, |
|
"loss": 0.6077, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 0.809107343528607, |
|
"grad_norm": 0.8221011161804199, |
|
"learning_rate": 9.545565599804545e-06, |
|
"loss": 0.5946, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.8100845263106464, |
|
"grad_norm": 0.7047521471977234, |
|
"learning_rate": 9.496701685805033e-06, |
|
"loss": 0.5973, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 0.8110617090926858, |
|
"grad_norm": 0.5717597007751465, |
|
"learning_rate": 9.447837771805523e-06, |
|
"loss": 0.6236, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.8120388918747252, |
|
"grad_norm": 0.93315190076828, |
|
"learning_rate": 9.39897385780601e-06, |
|
"loss": 0.6335, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 0.8130160746567645, |
|
"grad_norm": 0.7691722512245178, |
|
"learning_rate": 9.350109943806499e-06, |
|
"loss": 0.5986, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.8139932574388039, |
|
"grad_norm": 0.8947746157646179, |
|
"learning_rate": 9.301246029806989e-06, |
|
"loss": 0.5995, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 0.8149704402208433, |
|
"grad_norm": 0.8654600381851196, |
|
"learning_rate": 9.252382115807477e-06, |
|
"loss": 0.5844, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.8159476230028827, |
|
"grad_norm": 0.6563751697540283, |
|
"learning_rate": 9.203518201807965e-06, |
|
"loss": 0.588, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.8169248057849221, |
|
"grad_norm": 0.756237804889679, |
|
"learning_rate": 9.154654287808453e-06, |
|
"loss": 0.5814, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.8179019885669615, |
|
"grad_norm": 1.106650948524475, |
|
"learning_rate": 9.105790373808943e-06, |
|
"loss": 0.5924, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 0.8188791713490008, |
|
"grad_norm": 0.39193272590637207, |
|
"learning_rate": 9.05692645980943e-06, |
|
"loss": 0.6048, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.8198563541310402, |
|
"grad_norm": 0.7022530436515808, |
|
"learning_rate": 9.00806254580992e-06, |
|
"loss": 0.624, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 0.8208335369130796, |
|
"grad_norm": 0.7286639213562012, |
|
"learning_rate": 8.959198631810408e-06, |
|
"loss": 0.5825, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.821810719695119, |
|
"grad_norm": 0.9062661528587341, |
|
"learning_rate": 8.910334717810898e-06, |
|
"loss": 0.6024, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 0.8227879024771584, |
|
"grad_norm": 1.0051745176315308, |
|
"learning_rate": 8.861470803811386e-06, |
|
"loss": 0.5881, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.8237650852591978, |
|
"grad_norm": 0.5622514486312866, |
|
"learning_rate": 8.812606889811874e-06, |
|
"loss": 0.625, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"grad_norm": 0.80225670337677, |
|
"learning_rate": 8.763742975812362e-06, |
|
"loss": 0.6142, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.8257194508232765, |
|
"grad_norm": 0.7154406905174255, |
|
"learning_rate": 8.714879061812852e-06, |
|
"loss": 0.6009, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.8266966336053159, |
|
"grad_norm": 0.8191014528274536, |
|
"learning_rate": 8.66601514781334e-06, |
|
"loss": 0.6054, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.8276738163873553, |
|
"grad_norm": 1.4982640743255615, |
|
"learning_rate": 8.617151233813828e-06, |
|
"loss": 0.5917, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 0.8286509991693947, |
|
"grad_norm": 0.6662930250167847, |
|
"learning_rate": 8.568287319814318e-06, |
|
"loss": 0.6047, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.8296281819514341, |
|
"grad_norm": 0.8533642888069153, |
|
"learning_rate": 8.519423405814806e-06, |
|
"loss": 0.6275, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 0.8306053647334734, |
|
"grad_norm": 1.0405080318450928, |
|
"learning_rate": 8.470559491815294e-06, |
|
"loss": 0.6325, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.8315825475155127, |
|
"grad_norm": 0.3838236629962921, |
|
"learning_rate": 8.421695577815782e-06, |
|
"loss": 0.617, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 0.8325597302975521, |
|
"grad_norm": 0.7229349613189697, |
|
"learning_rate": 8.372831663816272e-06, |
|
"loss": 0.6095, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.8335369130795915, |
|
"grad_norm": 0.538932204246521, |
|
"learning_rate": 8.32396774981676e-06, |
|
"loss": 0.597, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 0.834514095861631, |
|
"grad_norm": 0.9081258177757263, |
|
"learning_rate": 8.27510383581725e-06, |
|
"loss": 0.576, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.8354912786436703, |
|
"grad_norm": 1.1647875308990479, |
|
"learning_rate": 8.226239921817738e-06, |
|
"loss": 0.6177, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.8364684614257096, |
|
"grad_norm": 0.5544024705886841, |
|
"learning_rate": 8.177376007818228e-06, |
|
"loss": 0.5944, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.837445644207749, |
|
"grad_norm": 0.49571287631988525, |
|
"learning_rate": 8.128512093818716e-06, |
|
"loss": 0.6417, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 0.8384228269897884, |
|
"grad_norm": 0.8068299293518066, |
|
"learning_rate": 8.079648179819204e-06, |
|
"loss": 0.6224, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.8394000097718278, |
|
"grad_norm": 0.9682297706604004, |
|
"learning_rate": 8.030784265819692e-06, |
|
"loss": 0.6111, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 0.8403771925538672, |
|
"grad_norm": 1.051151990890503, |
|
"learning_rate": 7.981920351820182e-06, |
|
"loss": 0.6, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.8413543753359066, |
|
"grad_norm": 0.568880558013916, |
|
"learning_rate": 7.93305643782067e-06, |
|
"loss": 0.6129, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 0.8423315581179459, |
|
"grad_norm": 0.7681874632835388, |
|
"learning_rate": 7.88419252382116e-06, |
|
"loss": 0.6291, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.8433087408999853, |
|
"grad_norm": 0.7521129250526428, |
|
"learning_rate": 7.835328609821647e-06, |
|
"loss": 0.5983, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 0.8442859236820247, |
|
"grad_norm": 0.6910899877548218, |
|
"learning_rate": 7.786464695822136e-06, |
|
"loss": 0.6065, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.8452631064640641, |
|
"grad_norm": 1.0774552822113037, |
|
"learning_rate": 7.737600781822624e-06, |
|
"loss": 0.6481, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.8462402892461035, |
|
"grad_norm": 0.5744395852088928, |
|
"learning_rate": 7.688736867823113e-06, |
|
"loss": 0.5881, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.8472174720281429, |
|
"grad_norm": 0.9754884839057922, |
|
"learning_rate": 7.639872953823601e-06, |
|
"loss": 0.6028, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 0.8481946548101822, |
|
"grad_norm": 0.5664985775947571, |
|
"learning_rate": 7.59100903982409e-06, |
|
"loss": 0.5759, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.8491718375922216, |
|
"grad_norm": 0.7173051238059998, |
|
"learning_rate": 7.542145125824579e-06, |
|
"loss": 0.6038, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 0.850149020374261, |
|
"grad_norm": 0.5157271027565002, |
|
"learning_rate": 7.493281211825068e-06, |
|
"loss": 0.5872, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.8511262031563004, |
|
"grad_norm": 2.847447156906128, |
|
"learning_rate": 7.444417297825557e-06, |
|
"loss": 0.6008, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 0.8521033859383398, |
|
"grad_norm": 1.259730577468872, |
|
"learning_rate": 7.395553383826045e-06, |
|
"loss": 0.6047, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 0.8530805687203792, |
|
"grad_norm": 0.5175238847732544, |
|
"learning_rate": 7.346689469826533e-06, |
|
"loss": 0.6294, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 0.8540577515024186, |
|
"grad_norm": 0.5168502926826477, |
|
"learning_rate": 7.297825555827022e-06, |
|
"loss": 0.5987, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 0.8550349342844579, |
|
"grad_norm": 0.7485826015472412, |
|
"learning_rate": 7.24896164182751e-06, |
|
"loss": 0.604, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.8560121170664973, |
|
"grad_norm": 1.2643144130706787, |
|
"learning_rate": 7.200097727827999e-06, |
|
"loss": 0.6271, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 0.8569892998485367, |
|
"grad_norm": 0.598031222820282, |
|
"learning_rate": 7.151233813828488e-06, |
|
"loss": 0.6201, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 0.8579664826305761, |
|
"grad_norm": 0.7994399666786194, |
|
"learning_rate": 7.102369899828977e-06, |
|
"loss": 0.6028, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 0.8589436654126155, |
|
"grad_norm": 0.47928521037101746, |
|
"learning_rate": 7.053505985829466e-06, |
|
"loss": 0.6042, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 0.8599208481946549, |
|
"grad_norm": 0.6901227831840515, |
|
"learning_rate": 7.004642071829953e-06, |
|
"loss": 0.6289, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.8608980309766942, |
|
"grad_norm": 0.9630447030067444, |
|
"learning_rate": 6.955778157830442e-06, |
|
"loss": 0.6097, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 0.8618752137587335, |
|
"grad_norm": 0.42696672677993774, |
|
"learning_rate": 6.906914243830931e-06, |
|
"loss": 0.6314, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 0.862852396540773, |
|
"grad_norm": 0.5964066982269287, |
|
"learning_rate": 6.85805032983142e-06, |
|
"loss": 0.5934, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 0.8638295793228123, |
|
"grad_norm": 0.5652678608894348, |
|
"learning_rate": 6.809186415831909e-06, |
|
"loss": 0.6032, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 0.8648067621048517, |
|
"grad_norm": 0.6129952669143677, |
|
"learning_rate": 6.7603225018323976e-06, |
|
"loss": 0.6116, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.8657839448868911, |
|
"grad_norm": 0.5786252021789551, |
|
"learning_rate": 6.7114585878328865e-06, |
|
"loss": 0.6042, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 0.8667611276689304, |
|
"grad_norm": 0.9830735325813293, |
|
"learning_rate": 6.662594673833375e-06, |
|
"loss": 0.5763, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 0.8677383104509698, |
|
"grad_norm": 0.7167491316795349, |
|
"learning_rate": 6.613730759833863e-06, |
|
"loss": 0.5774, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 0.8687154932330092, |
|
"grad_norm": 0.5763813257217407, |
|
"learning_rate": 6.5648668458343515e-06, |
|
"loss": 0.6219, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 0.8696926760150486, |
|
"grad_norm": 0.552343487739563, |
|
"learning_rate": 6.5160029318348404e-06, |
|
"loss": 0.5983, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.870669858797088, |
|
"grad_norm": 0.6471940279006958, |
|
"learning_rate": 6.4671390178353285e-06, |
|
"loss": 0.616, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 0.8716470415791274, |
|
"grad_norm": 0.2821710407733917, |
|
"learning_rate": 6.418275103835817e-06, |
|
"loss": 0.6093, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 0.8726242243611667, |
|
"grad_norm": 0.8784298896789551, |
|
"learning_rate": 6.369411189836306e-06, |
|
"loss": 0.6004, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 0.8736014071432061, |
|
"grad_norm": 0.5774518847465515, |
|
"learning_rate": 6.320547275836795e-06, |
|
"loss": 0.6177, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 0.8745785899252455, |
|
"grad_norm": 2.489976406097412, |
|
"learning_rate": 6.2716833618372825e-06, |
|
"loss": 0.6294, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.8755557727072849, |
|
"grad_norm": 0.8063492774963379, |
|
"learning_rate": 6.222819447837772e-06, |
|
"loss": 0.5815, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 0.8765329554893243, |
|
"grad_norm": 0.9328792095184326, |
|
"learning_rate": 6.17395553383826e-06, |
|
"loss": 0.5709, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 0.8775101382713637, |
|
"grad_norm": 1.1980705261230469, |
|
"learning_rate": 6.125091619838749e-06, |
|
"loss": 0.5916, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 0.878487321053403, |
|
"grad_norm": 0.9140294194221497, |
|
"learning_rate": 6.076227705839238e-06, |
|
"loss": 0.5975, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 0.8794645038354424, |
|
"grad_norm": 0.42323464155197144, |
|
"learning_rate": 6.027363791839727e-06, |
|
"loss": 0.5908, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.8804416866174818, |
|
"grad_norm": 0.8265115022659302, |
|
"learning_rate": 5.978499877840215e-06, |
|
"loss": 0.6236, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 0.8814188693995212, |
|
"grad_norm": 0.6848395466804504, |
|
"learning_rate": 5.929635963840704e-06, |
|
"loss": 0.6081, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 0.8823960521815606, |
|
"grad_norm": 0.8593265414237976, |
|
"learning_rate": 5.880772049841193e-06, |
|
"loss": 0.5926, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 0.8833732349636, |
|
"grad_norm": 0.9084621667861938, |
|
"learning_rate": 5.831908135841682e-06, |
|
"loss": 0.5795, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 0.8843504177456393, |
|
"grad_norm": 0.5158432126045227, |
|
"learning_rate": 5.78304422184217e-06, |
|
"loss": 0.5887, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.8853276005276787, |
|
"grad_norm": 0.9710085988044739, |
|
"learning_rate": 5.734180307842659e-06, |
|
"loss": 0.5888, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 0.8863047833097181, |
|
"grad_norm": 0.4963410794734955, |
|
"learning_rate": 5.685316393843147e-06, |
|
"loss": 0.5981, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 0.8872819660917575, |
|
"grad_norm": 0.39078134298324585, |
|
"learning_rate": 5.636452479843636e-06, |
|
"loss": 0.5991, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 0.8882591488737969, |
|
"grad_norm": 0.5350062847137451, |
|
"learning_rate": 5.587588565844124e-06, |
|
"loss": 0.5887, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 0.8892363316558363, |
|
"grad_norm": 0.6059613823890686, |
|
"learning_rate": 5.538724651844613e-06, |
|
"loss": 0.6072, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.8902135144378756, |
|
"grad_norm": 0.4223475158214569, |
|
"learning_rate": 5.489860737845102e-06, |
|
"loss": 0.5866, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 0.891190697219915, |
|
"grad_norm": 0.8053774237632751, |
|
"learning_rate": 5.44099682384559e-06, |
|
"loss": 0.6031, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 0.8921678800019543, |
|
"grad_norm": 0.8851518034934998, |
|
"learning_rate": 5.392132909846079e-06, |
|
"loss": 0.5766, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 0.8931450627839937, |
|
"grad_norm": 0.6842949986457825, |
|
"learning_rate": 5.3432689958465675e-06, |
|
"loss": 0.5593, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 0.8941222455660331, |
|
"grad_norm": 0.8229865431785583, |
|
"learning_rate": 5.2944050818470564e-06, |
|
"loss": 0.5802, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.8950994283480725, |
|
"grad_norm": 0.7434598207473755, |
|
"learning_rate": 5.2455411678475445e-06, |
|
"loss": 0.6004, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 0.8960766111301118, |
|
"grad_norm": 0.47747936844825745, |
|
"learning_rate": 5.196677253848033e-06, |
|
"loss": 0.5937, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 0.8970537939121512, |
|
"grad_norm": 0.7917630076408386, |
|
"learning_rate": 5.147813339848522e-06, |
|
"loss": 0.6119, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 0.8980309766941906, |
|
"grad_norm": 0.8409056663513184, |
|
"learning_rate": 5.098949425849011e-06, |
|
"loss": 0.6004, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 0.89900815947623, |
|
"grad_norm": 0.5597165822982788, |
|
"learning_rate": 5.050085511849499e-06, |
|
"loss": 0.6076, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.8999853422582694, |
|
"grad_norm": 0.5740428566932678, |
|
"learning_rate": 5.001221597849988e-06, |
|
"loss": 0.5925, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 0.9009625250403088, |
|
"grad_norm": 0.739456832408905, |
|
"learning_rate": 4.952357683850477e-06, |
|
"loss": 0.5945, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 0.9019397078223482, |
|
"grad_norm": 0.5648947954177856, |
|
"learning_rate": 4.903493769850965e-06, |
|
"loss": 0.5712, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 0.9029168906043875, |
|
"grad_norm": 0.5736894607543945, |
|
"learning_rate": 4.854629855851454e-06, |
|
"loss": 0.6111, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 0.9038940733864269, |
|
"grad_norm": 0.7701774835586548, |
|
"learning_rate": 4.805765941851942e-06, |
|
"loss": 0.599, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.9048712561684663, |
|
"grad_norm": 0.7485201358795166, |
|
"learning_rate": 4.756902027852431e-06, |
|
"loss": 0.5842, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 0.9058484389505057, |
|
"grad_norm": 0.6121499538421631, |
|
"learning_rate": 4.70803811385292e-06, |
|
"loss": 0.6198, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 0.9068256217325451, |
|
"grad_norm": 0.7362948656082153, |
|
"learning_rate": 4.659174199853408e-06, |
|
"loss": 0.6123, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 0.9078028045145845, |
|
"grad_norm": 0.606191098690033, |
|
"learning_rate": 4.610310285853897e-06, |
|
"loss": 0.6028, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 0.9087799872966238, |
|
"grad_norm": 0.6618565917015076, |
|
"learning_rate": 4.561446371854386e-06, |
|
"loss": 0.5963, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.9097571700786632, |
|
"grad_norm": 1.5052400827407837, |
|
"learning_rate": 4.512582457854874e-06, |
|
"loss": 0.603, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 0.9107343528607026, |
|
"grad_norm": 0.8985777497291565, |
|
"learning_rate": 4.463718543855363e-06, |
|
"loss": 0.6156, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 0.911711535642742, |
|
"grad_norm": 0.8037851452827454, |
|
"learning_rate": 4.414854629855852e-06, |
|
"loss": 0.6406, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 0.9126887184247814, |
|
"grad_norm": 0.49996376037597656, |
|
"learning_rate": 4.365990715856341e-06, |
|
"loss": 0.6139, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 0.9136659012068208, |
|
"grad_norm": 0.8254772424697876, |
|
"learning_rate": 4.317126801856829e-06, |
|
"loss": 0.6149, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.9146430839888601, |
|
"grad_norm": 0.7700937390327454, |
|
"learning_rate": 4.268262887857318e-06, |
|
"loss": 0.5993, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 0.9156202667708995, |
|
"grad_norm": 0.38511478900909424, |
|
"learning_rate": 4.2193989738578065e-06, |
|
"loss": 0.6232, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 0.9165974495529389, |
|
"grad_norm": 0.6567879319190979, |
|
"learning_rate": 4.1705350598582955e-06, |
|
"loss": 0.5813, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 0.9175746323349783, |
|
"grad_norm": 0.8876736760139465, |
|
"learning_rate": 4.1216711458587835e-06, |
|
"loss": 0.5938, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 0.9185518151170177, |
|
"grad_norm": 0.41622501611709595, |
|
"learning_rate": 4.0728072318592724e-06, |
|
"loss": 0.579, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.9195289978990571, |
|
"grad_norm": 0.7455472946166992, |
|
"learning_rate": 4.0239433178597605e-06, |
|
"loss": 0.6011, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 0.9205061806810964, |
|
"grad_norm": 0.5976389646530151, |
|
"learning_rate": 3.975079403860249e-06, |
|
"loss": 0.6143, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 0.9214833634631358, |
|
"grad_norm": 0.7773202657699585, |
|
"learning_rate": 3.9262154898607375e-06, |
|
"loss": 0.5796, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 0.9224605462451752, |
|
"grad_norm": 0.5033147931098938, |
|
"learning_rate": 3.877351575861226e-06, |
|
"loss": 0.5994, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 0.9234377290272145, |
|
"grad_norm": 0.7234833240509033, |
|
"learning_rate": 3.828487661861715e-06, |
|
"loss": 0.6102, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.924414911809254, |
|
"grad_norm": 0.4259088635444641, |
|
"learning_rate": 3.7796237478622038e-06, |
|
"loss": 0.5787, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 0.9253920945912933, |
|
"grad_norm": 0.43989598751068115, |
|
"learning_rate": 3.7307598338626923e-06, |
|
"loss": 0.5841, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 0.9263692773733326, |
|
"grad_norm": 0.4430140256881714, |
|
"learning_rate": 3.681895919863181e-06, |
|
"loss": 0.5933, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 0.927346460155372, |
|
"grad_norm": 0.7848074436187744, |
|
"learning_rate": 3.63303200586367e-06, |
|
"loss": 0.6138, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 0.9283236429374114, |
|
"grad_norm": 0.8117037415504456, |
|
"learning_rate": 3.584168091864158e-06, |
|
"loss": 0.5917, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.9293008257194508, |
|
"grad_norm": 0.6667145490646362, |
|
"learning_rate": 3.535304177864647e-06, |
|
"loss": 0.5542, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 0.9302780085014902, |
|
"grad_norm": 0.7902615070343018, |
|
"learning_rate": 3.486440263865136e-06, |
|
"loss": 0.5741, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 0.9312551912835296, |
|
"grad_norm": 0.7067260146141052, |
|
"learning_rate": 3.4375763498656245e-06, |
|
"loss": 0.5961, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 0.9322323740655689, |
|
"grad_norm": 2.328338861465454, |
|
"learning_rate": 3.388712435866113e-06, |
|
"loss": 0.5716, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 0.9332095568476083, |
|
"grad_norm": 1.1518771648406982, |
|
"learning_rate": 3.3398485218666014e-06, |
|
"loss": 0.6306, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.9341867396296477, |
|
"grad_norm": 0.5183611512184143, |
|
"learning_rate": 3.2909846078670904e-06, |
|
"loss": 0.5998, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 0.9351639224116871, |
|
"grad_norm": 0.6827223300933838, |
|
"learning_rate": 3.2421206938675793e-06, |
|
"loss": 0.5948, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 0.9361411051937265, |
|
"grad_norm": 0.6556549668312073, |
|
"learning_rate": 3.1932567798680673e-06, |
|
"loss": 0.6014, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 0.9371182879757659, |
|
"grad_norm": 0.5259923934936523, |
|
"learning_rate": 3.1443928658685562e-06, |
|
"loss": 0.6192, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 0.9380954707578052, |
|
"grad_norm": 0.6890705823898315, |
|
"learning_rate": 3.095528951869045e-06, |
|
"loss": 0.5922, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.9390726535398446, |
|
"grad_norm": 0.5739189386367798, |
|
"learning_rate": 3.0466650378695336e-06, |
|
"loss": 0.572, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 0.940049836321884, |
|
"grad_norm": 0.4784778356552124, |
|
"learning_rate": 2.997801123870022e-06, |
|
"loss": 0.5924, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 0.9410270191039234, |
|
"grad_norm": 0.4622921049594879, |
|
"learning_rate": 2.9489372098705106e-06, |
|
"loss": 0.6223, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 0.9420042018859628, |
|
"grad_norm": 0.7146719098091125, |
|
"learning_rate": 2.900073295870999e-06, |
|
"loss": 0.589, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 0.9429813846680022, |
|
"grad_norm": 0.5467257499694824, |
|
"learning_rate": 2.851209381871488e-06, |
|
"loss": 0.6197, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.9439585674500416, |
|
"grad_norm": 0.6875296831130981, |
|
"learning_rate": 2.8023454678719765e-06, |
|
"loss": 0.588, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 0.9449357502320809, |
|
"grad_norm": 0.8921650052070618, |
|
"learning_rate": 2.7534815538724654e-06, |
|
"loss": 0.6008, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 0.9459129330141203, |
|
"grad_norm": 0.6401572823524475, |
|
"learning_rate": 2.704617639872954e-06, |
|
"loss": 0.5858, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 0.9468901157961597, |
|
"grad_norm": 0.7191618084907532, |
|
"learning_rate": 2.655753725873443e-06, |
|
"loss": 0.5763, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 0.6186959147453308, |
|
"learning_rate": 2.6068898118739313e-06, |
|
"loss": 0.5695, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.9488444813602385, |
|
"grad_norm": 0.36472517251968384, |
|
"learning_rate": 2.5580258978744198e-06, |
|
"loss": 0.5819, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 0.9498216641422779, |
|
"grad_norm": 1.0958882570266724, |
|
"learning_rate": 2.5091619838749083e-06, |
|
"loss": 0.6167, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 0.9507988469243172, |
|
"grad_norm": 0.7372691631317139, |
|
"learning_rate": 2.460298069875397e-06, |
|
"loss": 0.5936, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 0.9517760297063566, |
|
"grad_norm": 0.4143502116203308, |
|
"learning_rate": 2.4114341558758857e-06, |
|
"loss": 0.5873, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 0.952753212488396, |
|
"grad_norm": 1.134059190750122, |
|
"learning_rate": 2.3625702418763746e-06, |
|
"loss": 0.6143, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.9537303952704353, |
|
"grad_norm": 0.40213558077812195, |
|
"learning_rate": 2.313706327876863e-06, |
|
"loss": 0.5725, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 0.9547075780524747, |
|
"grad_norm": 0.5387831926345825, |
|
"learning_rate": 2.264842413877352e-06, |
|
"loss": 0.5959, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 0.9556847608345141, |
|
"grad_norm": 0.8288729786872864, |
|
"learning_rate": 2.2159784998778405e-06, |
|
"loss": 0.5881, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 0.9566619436165534, |
|
"grad_norm": 0.7433648109436035, |
|
"learning_rate": 2.167114585878329e-06, |
|
"loss": 0.5881, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 0.9576391263985928, |
|
"grad_norm": 0.7633154392242432, |
|
"learning_rate": 2.1182506718788174e-06, |
|
"loss": 0.6218, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.9586163091806322, |
|
"grad_norm": 0.5039961338043213, |
|
"learning_rate": 2.069386757879306e-06, |
|
"loss": 0.5973, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 0.9595934919626716, |
|
"grad_norm": 0.9047883152961731, |
|
"learning_rate": 2.020522843879795e-06, |
|
"loss": 0.5741, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 0.960570674744711, |
|
"grad_norm": 0.6591965556144714, |
|
"learning_rate": 1.9716589298802833e-06, |
|
"loss": 0.5914, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 0.9615478575267504, |
|
"grad_norm": 0.6809371113777161, |
|
"learning_rate": 1.9227950158807722e-06, |
|
"loss": 0.5876, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 0.9625250403087897, |
|
"grad_norm": 0.5399168133735657, |
|
"learning_rate": 1.8739311018812607e-06, |
|
"loss": 0.5921, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.9635022230908291, |
|
"grad_norm": 0.6308420896530151, |
|
"learning_rate": 1.8250671878817494e-06, |
|
"loss": 0.5805, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 0.9644794058728685, |
|
"grad_norm": 0.8909119963645935, |
|
"learning_rate": 1.776203273882238e-06, |
|
"loss": 0.6062, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 0.9654565886549079, |
|
"grad_norm": 0.5217241048812866, |
|
"learning_rate": 1.7273393598827268e-06, |
|
"loss": 0.5866, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 0.9664337714369473, |
|
"grad_norm": 0.5441256165504456, |
|
"learning_rate": 1.6784754458832153e-06, |
|
"loss": 0.5889, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 0.9674109542189867, |
|
"grad_norm": 0.6473023891448975, |
|
"learning_rate": 1.629611531883704e-06, |
|
"loss": 0.6066, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.968388137001026, |
|
"grad_norm": 0.7462273836135864, |
|
"learning_rate": 1.5807476178841925e-06, |
|
"loss": 0.5926, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 0.9693653197830654, |
|
"grad_norm": 0.4794386029243469, |
|
"learning_rate": 1.5318837038846812e-06, |
|
"loss": 0.5856, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 0.9703425025651048, |
|
"grad_norm": 0.5676984190940857, |
|
"learning_rate": 1.48301978988517e-06, |
|
"loss": 0.5797, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 0.9713196853471442, |
|
"grad_norm": 0.7232435941696167, |
|
"learning_rate": 1.4341558758856586e-06, |
|
"loss": 0.6122, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 0.9722968681291836, |
|
"grad_norm": 0.6773326396942139, |
|
"learning_rate": 1.385291961886147e-06, |
|
"loss": 0.5877, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.973274050911223, |
|
"grad_norm": 0.522219717502594, |
|
"learning_rate": 1.3364280478866358e-06, |
|
"loss": 0.5819, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 0.9742512336932623, |
|
"grad_norm": 0.7057138681411743, |
|
"learning_rate": 1.2875641338871245e-06, |
|
"loss": 0.6047, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 0.9752284164753017, |
|
"grad_norm": 0.8740668296813965, |
|
"learning_rate": 1.2387002198876132e-06, |
|
"loss": 0.5909, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 0.9762055992573411, |
|
"grad_norm": 0.6199445128440857, |
|
"learning_rate": 1.1898363058881017e-06, |
|
"loss": 0.5972, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 0.9771827820393805, |
|
"grad_norm": 0.8061028122901917, |
|
"learning_rate": 1.1409723918885904e-06, |
|
"loss": 0.5958, |
|
"step": 100000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 102335, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.254292317011968e+18, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|