diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,10990 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 2.0, - "eval_steps": 500, - "global_step": 1564, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0012787723785166241, - "grad_norm": 36.605578307873216, - "learning_rate": 4.2553191489361704e-07, - "loss": 2.1568, - "step": 1 - }, - { - "epoch": 0.0025575447570332483, - "grad_norm": 33.415295616575754, - "learning_rate": 8.510638297872341e-07, - "loss": 2.0922, - "step": 2 - }, - { - "epoch": 0.0038363171355498722, - "grad_norm": 4.621331731254024, - "learning_rate": 1.276595744680851e-06, - "loss": 0.7744, - "step": 3 - }, - { - "epoch": 0.005115089514066497, - "grad_norm": 34.24567017442146, - "learning_rate": 1.7021276595744682e-06, - "loss": 2.1457, - "step": 4 - }, - { - "epoch": 0.00639386189258312, - "grad_norm": 28.40225202328264, - "learning_rate": 2.1276595744680853e-06, - "loss": 1.9419, - "step": 5 - }, - { - "epoch": 0.0076726342710997444, - "grad_norm": 27.91220502985079, - "learning_rate": 2.553191489361702e-06, - "loss": 1.8848, - "step": 6 - }, - { - "epoch": 0.008951406649616368, - "grad_norm": 12.120985612924882, - "learning_rate": 2.978723404255319e-06, - "loss": 1.7026, - "step": 7 - }, - { - "epoch": 0.010230179028132993, - "grad_norm": 9.248899744422705, - "learning_rate": 3.4042553191489363e-06, - "loss": 1.6385, - "step": 8 - }, - { - "epoch": 0.011508951406649617, - "grad_norm": 10.03205428387366, - "learning_rate": 3.8297872340425535e-06, - "loss": 1.5758, - "step": 9 - }, - { - "epoch": 0.01278772378516624, - "grad_norm": 8.199053478266668, - "learning_rate": 4.255319148936171e-06, - "loss": 1.6093, - "step": 10 - }, - { - "epoch": 0.014066496163682864, - "grad_norm": 7.363887554049014, - "learning_rate": 4.680851063829788e-06, - "loss": 1.5339, - "step": 11 - }, - { - "epoch": 0.015345268542199489, - "grad_norm": 7.648934692635177, - "learning_rate": 5.106382978723404e-06, - "loss": 1.6013, - "step": 12 - }, - { - "epoch": 0.016624040920716114, - "grad_norm": 6.0671393364567825, - "learning_rate": 5.531914893617022e-06, - "loss": 1.4691, - "step": 13 - }, - { - "epoch": 0.017902813299232736, - "grad_norm": 4.618106587033753, - "learning_rate": 5.957446808510638e-06, - "loss": 1.3804, - "step": 14 - }, - { - "epoch": 0.01918158567774936, - "grad_norm": 5.3743088674018615, - "learning_rate": 6.382978723404256e-06, - "loss": 1.5038, - "step": 15 - }, - { - "epoch": 0.020460358056265986, - "grad_norm": 5.17701153519708, - "learning_rate": 6.808510638297873e-06, - "loss": 0.7805, - "step": 16 - }, - { - "epoch": 0.021739130434782608, - "grad_norm": 4.332240068933078, - "learning_rate": 7.234042553191491e-06, - "loss": 0.7871, - "step": 17 - }, - { - "epoch": 0.023017902813299233, - "grad_norm": 6.864354586832835, - "learning_rate": 7.659574468085107e-06, - "loss": 1.5159, - "step": 18 - }, - { - "epoch": 0.024296675191815855, - "grad_norm": 5.8181684094654775, - "learning_rate": 8.085106382978723e-06, - "loss": 1.4676, - "step": 19 - }, - { - "epoch": 0.02557544757033248, - "grad_norm": 2.971066162977147, - "learning_rate": 8.510638297872341e-06, - "loss": 0.7714, - "step": 20 - }, - { - "epoch": 0.026854219948849106, - "grad_norm": 4.540941888207125, - "learning_rate": 8.936170212765958e-06, - "loss": 1.4193, - "step": 21 - }, - { - "epoch": 0.028132992327365727, - "grad_norm": 4.468046825773981, - "learning_rate": 9.361702127659576e-06, - "loss": 1.5075, - "step": 22 - }, - { - "epoch": 0.029411764705882353, - "grad_norm": 4.20950169727546, - "learning_rate": 9.787234042553192e-06, - "loss": 1.4541, - "step": 23 - }, - { - "epoch": 0.030690537084398978, - "grad_norm": 3.9560777357544383, - "learning_rate": 1.0212765957446808e-05, - "loss": 1.3988, - "step": 24 - }, - { - "epoch": 0.0319693094629156, - "grad_norm": 4.25806419518347, - "learning_rate": 1.0638297872340426e-05, - "loss": 1.4733, - "step": 25 - }, - { - "epoch": 0.03324808184143223, - "grad_norm": 2.4463184140462895, - "learning_rate": 1.1063829787234044e-05, - "loss": 0.6891, - "step": 26 - }, - { - "epoch": 0.034526854219948847, - "grad_norm": 2.6718314946873507, - "learning_rate": 1.1489361702127662e-05, - "loss": 0.7597, - "step": 27 - }, - { - "epoch": 0.03580562659846547, - "grad_norm": 4.0808818610222595, - "learning_rate": 1.1914893617021277e-05, - "loss": 1.3857, - "step": 28 - }, - { - "epoch": 0.0370843989769821, - "grad_norm": 3.7903196620823643, - "learning_rate": 1.2340425531914895e-05, - "loss": 1.4038, - "step": 29 - }, - { - "epoch": 0.03836317135549872, - "grad_norm": 4.498782473770312, - "learning_rate": 1.2765957446808513e-05, - "loss": 1.4099, - "step": 30 - }, - { - "epoch": 0.03964194373401535, - "grad_norm": 4.178474999224735, - "learning_rate": 1.3191489361702127e-05, - "loss": 1.4112, - "step": 31 - }, - { - "epoch": 0.04092071611253197, - "grad_norm": 3.8978032814811416, - "learning_rate": 1.3617021276595745e-05, - "loss": 1.4374, - "step": 32 - }, - { - "epoch": 0.04219948849104859, - "grad_norm": 3.6931786411454937, - "learning_rate": 1.4042553191489363e-05, - "loss": 1.3439, - "step": 33 - }, - { - "epoch": 0.043478260869565216, - "grad_norm": 4.016866698179548, - "learning_rate": 1.4468085106382981e-05, - "loss": 1.3855, - "step": 34 - }, - { - "epoch": 0.04475703324808184, - "grad_norm": 3.6292271537276872, - "learning_rate": 1.4893617021276596e-05, - "loss": 1.3625, - "step": 35 - }, - { - "epoch": 0.04603580562659847, - "grad_norm": 2.434286278312962, - "learning_rate": 1.5319148936170214e-05, - "loss": 0.7333, - "step": 36 - }, - { - "epoch": 0.04731457800511509, - "grad_norm": 4.240835520629863, - "learning_rate": 1.5744680851063832e-05, - "loss": 1.3892, - "step": 37 - }, - { - "epoch": 0.04859335038363171, - "grad_norm": 2.0580331121790243, - "learning_rate": 1.6170212765957446e-05, - "loss": 0.6586, - "step": 38 - }, - { - "epoch": 0.049872122762148335, - "grad_norm": 4.218725555125208, - "learning_rate": 1.6595744680851064e-05, - "loss": 1.446, - "step": 39 - }, - { - "epoch": 0.05115089514066496, - "grad_norm": 3.86226874194165, - "learning_rate": 1.7021276595744682e-05, - "loss": 1.3409, - "step": 40 - }, - { - "epoch": 0.052429667519181586, - "grad_norm": 3.5934569775686676, - "learning_rate": 1.74468085106383e-05, - "loss": 1.4178, - "step": 41 - }, - { - "epoch": 0.05370843989769821, - "grad_norm": 3.5587595219501353, - "learning_rate": 1.7872340425531915e-05, - "loss": 1.4387, - "step": 42 - }, - { - "epoch": 0.054987212276214836, - "grad_norm": 3.798121507795569, - "learning_rate": 1.8297872340425533e-05, - "loss": 1.3733, - "step": 43 - }, - { - "epoch": 0.056265984654731455, - "grad_norm": 3.538666286727008, - "learning_rate": 1.872340425531915e-05, - "loss": 1.3843, - "step": 44 - }, - { - "epoch": 0.05754475703324808, - "grad_norm": 3.7811908610613143, - "learning_rate": 1.914893617021277e-05, - "loss": 1.363, - "step": 45 - }, - { - "epoch": 0.058823529411764705, - "grad_norm": 4.041280888742075, - "learning_rate": 1.9574468085106384e-05, - "loss": 1.399, - "step": 46 - }, - { - "epoch": 0.06010230179028133, - "grad_norm": 3.7726061428584807, - "learning_rate": 2e-05, - "loss": 1.3697, - "step": 47 - }, - { - "epoch": 0.061381074168797956, - "grad_norm": 3.4591656652273124, - "learning_rate": 1.999997855636369e-05, - "loss": 1.3832, - "step": 48 - }, - { - "epoch": 0.06265984654731457, - "grad_norm": 2.3983536579705285, - "learning_rate": 1.9999914225546713e-05, - "loss": 0.743, - "step": 49 - }, - { - "epoch": 0.0639386189258312, - "grad_norm": 4.050847386567195, - "learning_rate": 1.9999807007824975e-05, - "loss": 1.2832, - "step": 50 - }, - { - "epoch": 0.06521739130434782, - "grad_norm": 3.814166423237622, - "learning_rate": 1.9999656903658296e-05, - "loss": 1.3665, - "step": 51 - }, - { - "epoch": 0.06649616368286446, - "grad_norm": 3.9514466209405383, - "learning_rate": 1.999946391369044e-05, - "loss": 1.43, - "step": 52 - }, - { - "epoch": 0.06777493606138107, - "grad_norm": 3.9116736618090546, - "learning_rate": 1.9999228038749084e-05, - "loss": 1.4031, - "step": 53 - }, - { - "epoch": 0.06905370843989769, - "grad_norm": 3.9660124720817285, - "learning_rate": 1.999894927984583e-05, - "loss": 1.4487, - "step": 54 - }, - { - "epoch": 0.07033248081841433, - "grad_norm": 3.5563815297426484, - "learning_rate": 1.9998627638176204e-05, - "loss": 1.3957, - "step": 55 - }, - { - "epoch": 0.07161125319693094, - "grad_norm": 3.6257054019860506, - "learning_rate": 1.9998263115119635e-05, - "loss": 1.4422, - "step": 56 - }, - { - "epoch": 0.07289002557544758, - "grad_norm": 3.6123148690008904, - "learning_rate": 1.9997855712239462e-05, - "loss": 1.375, - "step": 57 - }, - { - "epoch": 0.0741687979539642, - "grad_norm": 3.3537116491461068, - "learning_rate": 1.999740543128293e-05, - "loss": 1.3736, - "step": 58 - }, - { - "epoch": 0.07544757033248081, - "grad_norm": 1.9520763749295138, - "learning_rate": 1.9996912274181164e-05, - "loss": 0.6848, - "step": 59 - }, - { - "epoch": 0.07672634271099744, - "grad_norm": 3.8557602551247983, - "learning_rate": 1.999637624304919e-05, - "loss": 1.3589, - "step": 60 - }, - { - "epoch": 0.07800511508951406, - "grad_norm": 2.17241859511162, - "learning_rate": 1.9995797340185888e-05, - "loss": 0.7629, - "step": 61 - }, - { - "epoch": 0.0792838874680307, - "grad_norm": 3.867916868142898, - "learning_rate": 1.9995175568074026e-05, - "loss": 1.3916, - "step": 62 - }, - { - "epoch": 0.08056265984654731, - "grad_norm": 3.4945106680709057, - "learning_rate": 1.9994510929380203e-05, - "loss": 1.3948, - "step": 63 - }, - { - "epoch": 0.08184143222506395, - "grad_norm": 3.475787716500603, - "learning_rate": 1.9993803426954885e-05, - "loss": 1.4484, - "step": 64 - }, - { - "epoch": 0.08312020460358056, - "grad_norm": 3.2559235768784838, - "learning_rate": 1.9993053063832347e-05, - "loss": 1.4073, - "step": 65 - }, - { - "epoch": 0.08439897698209718, - "grad_norm": 3.547666226717069, - "learning_rate": 1.99922598432307e-05, - "loss": 1.4236, - "step": 66 - }, - { - "epoch": 0.08567774936061381, - "grad_norm": 2.456097379586184, - "learning_rate": 1.9991423768551844e-05, - "loss": 0.7556, - "step": 67 - }, - { - "epoch": 0.08695652173913043, - "grad_norm": 3.3860710865391757, - "learning_rate": 1.999054484338148e-05, - "loss": 1.3646, - "step": 68 - }, - { - "epoch": 0.08823529411764706, - "grad_norm": 1.9803872832524343, - "learning_rate": 1.9989623071489075e-05, - "loss": 0.7341, - "step": 69 - }, - { - "epoch": 0.08951406649616368, - "grad_norm": 3.516622628882788, - "learning_rate": 1.998865845682786e-05, - "loss": 1.4084, - "step": 70 - }, - { - "epoch": 0.0907928388746803, - "grad_norm": 3.3519716388303196, - "learning_rate": 1.99876510035348e-05, - "loss": 1.3886, - "step": 71 - }, - { - "epoch": 0.09207161125319693, - "grad_norm": 3.280121432898206, - "learning_rate": 1.9986600715930596e-05, - "loss": 1.424, - "step": 72 - }, - { - "epoch": 0.09335038363171355, - "grad_norm": 3.250383096610705, - "learning_rate": 1.9985507598519636e-05, - "loss": 1.463, - "step": 73 - }, - { - "epoch": 0.09462915601023018, - "grad_norm": 3.2089225385297744, - "learning_rate": 1.9984371655990008e-05, - "loss": 1.3883, - "step": 74 - }, - { - "epoch": 0.0959079283887468, - "grad_norm": 3.4193518080682535, - "learning_rate": 1.9983192893213455e-05, - "loss": 1.3934, - "step": 75 - }, - { - "epoch": 0.09718670076726342, - "grad_norm": 3.357987716074897, - "learning_rate": 1.998197131524537e-05, - "loss": 1.409, - "step": 76 - }, - { - "epoch": 0.09846547314578005, - "grad_norm": 3.5893554329694664, - "learning_rate": 1.9980706927324776e-05, - "loss": 1.3914, - "step": 77 - }, - { - "epoch": 0.09974424552429667, - "grad_norm": 2.4949235242678194, - "learning_rate": 1.9979399734874273e-05, - "loss": 0.7583, - "step": 78 - }, - { - "epoch": 0.1010230179028133, - "grad_norm": 2.1206978230206683, - "learning_rate": 1.9978049743500068e-05, - "loss": 0.6874, - "step": 79 - }, - { - "epoch": 0.10230179028132992, - "grad_norm": 2.042593919723555, - "learning_rate": 1.9976656958991896e-05, - "loss": 0.7173, - "step": 80 - }, - { - "epoch": 0.10358056265984655, - "grad_norm": 4.655214810700653, - "learning_rate": 1.9975221387323032e-05, - "loss": 1.421, - "step": 81 - }, - { - "epoch": 0.10485933503836317, - "grad_norm": 2.313773154995568, - "learning_rate": 1.9973743034650254e-05, - "loss": 0.7871, - "step": 82 - }, - { - "epoch": 0.10613810741687979, - "grad_norm": 3.7554878071688402, - "learning_rate": 1.9972221907313815e-05, - "loss": 1.442, - "step": 83 - }, - { - "epoch": 0.10741687979539642, - "grad_norm": 3.7024516128455907, - "learning_rate": 1.9970658011837404e-05, - "loss": 1.4046, - "step": 84 - }, - { - "epoch": 0.10869565217391304, - "grad_norm": 3.19338979164646, - "learning_rate": 1.9969051354928158e-05, - "loss": 1.3564, - "step": 85 - }, - { - "epoch": 0.10997442455242967, - "grad_norm": 3.3120786041046455, - "learning_rate": 1.9967401943476577e-05, - "loss": 1.3837, - "step": 86 - }, - { - "epoch": 0.11125319693094629, - "grad_norm": 3.393693890098968, - "learning_rate": 1.9965709784556545e-05, - "loss": 1.4177, - "step": 87 - }, - { - "epoch": 0.11253196930946291, - "grad_norm": 3.7947119515750085, - "learning_rate": 1.9963974885425267e-05, - "loss": 1.4072, - "step": 88 - }, - { - "epoch": 0.11381074168797954, - "grad_norm": 3.5416545514916775, - "learning_rate": 1.996219725352325e-05, - "loss": 1.42, - "step": 89 - }, - { - "epoch": 0.11508951406649616, - "grad_norm": 3.189254282123899, - "learning_rate": 1.996037689647428e-05, - "loss": 1.3232, - "step": 90 - }, - { - "epoch": 0.11636828644501279, - "grad_norm": 3.5248394082220975, - "learning_rate": 1.9958513822085365e-05, - "loss": 1.4329, - "step": 91 - }, - { - "epoch": 0.11764705882352941, - "grad_norm": 3.524178768713469, - "learning_rate": 1.9956608038346723e-05, - "loss": 1.3306, - "step": 92 - }, - { - "epoch": 0.11892583120204604, - "grad_norm": 3.707781957954962, - "learning_rate": 1.9954659553431743e-05, - "loss": 1.3658, - "step": 93 - }, - { - "epoch": 0.12020460358056266, - "grad_norm": 2.36910811987986, - "learning_rate": 1.9952668375696946e-05, - "loss": 0.732, - "step": 94 - }, - { - "epoch": 0.12148337595907928, - "grad_norm": 3.4161545852641777, - "learning_rate": 1.9950634513681946e-05, - "loss": 1.3577, - "step": 95 - }, - { - "epoch": 0.12276214833759591, - "grad_norm": 3.619185266185885, - "learning_rate": 1.994855797610943e-05, - "loss": 1.4299, - "step": 96 - }, - { - "epoch": 0.12404092071611253, - "grad_norm": 3.2498071640015667, - "learning_rate": 1.9946438771885096e-05, - "loss": 1.4242, - "step": 97 - }, - { - "epoch": 0.12531969309462915, - "grad_norm": 3.165374960129344, - "learning_rate": 1.994427691009763e-05, - "loss": 1.4173, - "step": 98 - }, - { - "epoch": 0.1265984654731458, - "grad_norm": 2.2049060225614783, - "learning_rate": 1.9942072400018675e-05, - "loss": 0.7405, - "step": 99 - }, - { - "epoch": 0.1278772378516624, - "grad_norm": 3.3120039709468307, - "learning_rate": 1.9939825251102768e-05, - "loss": 1.4081, - "step": 100 - }, - { - "epoch": 0.12915601023017903, - "grad_norm": 3.235307597478888, - "learning_rate": 1.993753547298732e-05, - "loss": 1.3996, - "step": 101 - }, - { - "epoch": 0.13043478260869565, - "grad_norm": 3.466165414706452, - "learning_rate": 1.9935203075492567e-05, - "loss": 1.367, - "step": 102 - }, - { - "epoch": 0.13171355498721227, - "grad_norm": 3.5225068196250167, - "learning_rate": 1.993282806862152e-05, - "loss": 1.4022, - "step": 103 - }, - { - "epoch": 0.1329923273657289, - "grad_norm": 3.4637862491477676, - "learning_rate": 1.993041046255994e-05, - "loss": 1.3817, - "step": 104 - }, - { - "epoch": 0.13427109974424553, - "grad_norm": 3.539973642666539, - "learning_rate": 1.992795026767628e-05, - "loss": 1.4105, - "step": 105 - }, - { - "epoch": 0.13554987212276215, - "grad_norm": 2.4236348460224306, - "learning_rate": 1.9925447494521642e-05, - "loss": 0.7307, - "step": 106 - }, - { - "epoch": 0.13682864450127877, - "grad_norm": 3.804581343377563, - "learning_rate": 1.9922902153829742e-05, - "loss": 1.408, - "step": 107 - }, - { - "epoch": 0.13810741687979539, - "grad_norm": 3.5607156681038856, - "learning_rate": 1.9920314256516845e-05, - "loss": 1.4499, - "step": 108 - }, - { - "epoch": 0.13938618925831203, - "grad_norm": 3.4622089953073036, - "learning_rate": 1.9917683813681744e-05, - "loss": 1.3687, - "step": 109 - }, - { - "epoch": 0.14066496163682865, - "grad_norm": 3.3387822336737596, - "learning_rate": 1.991501083660569e-05, - "loss": 1.3892, - "step": 110 - }, - { - "epoch": 0.14194373401534527, - "grad_norm": 3.2562066293118797, - "learning_rate": 1.991229533675235e-05, - "loss": 1.3966, - "step": 111 - }, - { - "epoch": 0.1432225063938619, - "grad_norm": 3.3262914004316086, - "learning_rate": 1.990953732576776e-05, - "loss": 1.3175, - "step": 112 - }, - { - "epoch": 0.1445012787723785, - "grad_norm": 2.741251368759412, - "learning_rate": 1.990673681548028e-05, - "loss": 0.7764, - "step": 113 - }, - { - "epoch": 0.14578005115089515, - "grad_norm": 3.520834056219435, - "learning_rate": 1.990389381790054e-05, - "loss": 1.3977, - "step": 114 - }, - { - "epoch": 0.14705882352941177, - "grad_norm": 3.7430962585538894, - "learning_rate": 1.990100834522137e-05, - "loss": 1.475, - "step": 115 - }, - { - "epoch": 0.1483375959079284, - "grad_norm": 1.9371232490347023, - "learning_rate": 1.9898080409817783e-05, - "loss": 0.7053, - "step": 116 - }, - { - "epoch": 0.149616368286445, - "grad_norm": 3.417664725046512, - "learning_rate": 1.9895110024246893e-05, - "loss": 1.3579, - "step": 117 - }, - { - "epoch": 0.15089514066496162, - "grad_norm": 3.3358102718766394, - "learning_rate": 1.9892097201247873e-05, - "loss": 1.445, - "step": 118 - }, - { - "epoch": 0.15217391304347827, - "grad_norm": 2.1449485361885388, - "learning_rate": 1.9889041953741905e-05, - "loss": 0.7452, - "step": 119 - }, - { - "epoch": 0.1534526854219949, - "grad_norm": 3.3012728778824996, - "learning_rate": 1.9885944294832104e-05, - "loss": 1.4069, - "step": 120 - }, - { - "epoch": 0.1547314578005115, - "grad_norm": 3.2313285064459376, - "learning_rate": 1.9882804237803487e-05, - "loss": 1.3615, - "step": 121 - }, - { - "epoch": 0.15601023017902813, - "grad_norm": 3.2245796334599253, - "learning_rate": 1.9879621796122905e-05, - "loss": 1.447, - "step": 122 - }, - { - "epoch": 0.15728900255754474, - "grad_norm": 3.5505516834841706, - "learning_rate": 1.9876396983438978e-05, - "loss": 1.3173, - "step": 123 - }, - { - "epoch": 0.1585677749360614, - "grad_norm": 3.219089437275901, - "learning_rate": 1.987312981358205e-05, - "loss": 1.3481, - "step": 124 - }, - { - "epoch": 0.159846547314578, - "grad_norm": 3.447469263354993, - "learning_rate": 1.9869820300564128e-05, - "loss": 0.703, - "step": 125 - }, - { - "epoch": 0.16112531969309463, - "grad_norm": 3.1975802757959686, - "learning_rate": 1.98664684585788e-05, - "loss": 1.4459, - "step": 126 - }, - { - "epoch": 0.16240409207161124, - "grad_norm": 3.5468808522825013, - "learning_rate": 1.9863074302001207e-05, - "loss": 1.3852, - "step": 127 - }, - { - "epoch": 0.1636828644501279, - "grad_norm": 3.251768826197522, - "learning_rate": 1.9859637845387962e-05, - "loss": 1.406, - "step": 128 - }, - { - "epoch": 0.1649616368286445, - "grad_norm": 3.2312642727167167, - "learning_rate": 1.9856159103477085e-05, - "loss": 1.3395, - "step": 129 - }, - { - "epoch": 0.16624040920716113, - "grad_norm": 3.072815063118375, - "learning_rate": 1.985263809118796e-05, - "loss": 0.7204, - "step": 130 - }, - { - "epoch": 0.16751918158567775, - "grad_norm": 3.4086816063632877, - "learning_rate": 1.984907482362124e-05, - "loss": 1.3717, - "step": 131 - }, - { - "epoch": 0.16879795396419436, - "grad_norm": 2.532300514800621, - "learning_rate": 1.984546931605881e-05, - "loss": 0.7808, - "step": 132 - }, - { - "epoch": 0.170076726342711, - "grad_norm": 1.9627829637920826, - "learning_rate": 1.9841821583963716e-05, - "loss": 0.766, - "step": 133 - }, - { - "epoch": 0.17135549872122763, - "grad_norm": 4.0860708556725935, - "learning_rate": 1.9838131642980075e-05, - "loss": 1.4183, - "step": 134 - }, - { - "epoch": 0.17263427109974425, - "grad_norm": 3.3084822315158076, - "learning_rate": 1.983439950893304e-05, - "loss": 1.3792, - "step": 135 - }, - { - "epoch": 0.17391304347826086, - "grad_norm": 3.3039812630885916, - "learning_rate": 1.9830625197828724e-05, - "loss": 1.3887, - "step": 136 - }, - { - "epoch": 0.17519181585677748, - "grad_norm": 3.2603378144385475, - "learning_rate": 1.9826808725854106e-05, - "loss": 1.3539, - "step": 137 - }, - { - "epoch": 0.17647058823529413, - "grad_norm": 3.165228364292122, - "learning_rate": 1.9822950109377005e-05, - "loss": 1.3774, - "step": 138 - }, - { - "epoch": 0.17774936061381075, - "grad_norm": 3.1924653284284634, - "learning_rate": 1.9819049364945967e-05, - "loss": 1.4037, - "step": 139 - }, - { - "epoch": 0.17902813299232737, - "grad_norm": 3.3637189225648165, - "learning_rate": 1.9815106509290224e-05, - "loss": 1.3675, - "step": 140 - }, - { - "epoch": 0.18030690537084398, - "grad_norm": 2.794105643638662, - "learning_rate": 1.9811121559319607e-05, - "loss": 0.7725, - "step": 141 - }, - { - "epoch": 0.1815856777493606, - "grad_norm": 3.325931165806923, - "learning_rate": 1.9807094532124484e-05, - "loss": 1.3868, - "step": 142 - }, - { - "epoch": 0.18286445012787725, - "grad_norm": 2.021467930397522, - "learning_rate": 1.980302544497567e-05, - "loss": 0.7129, - "step": 143 - }, - { - "epoch": 0.18414322250639387, - "grad_norm": 3.175909747750123, - "learning_rate": 1.979891431532437e-05, - "loss": 1.3974, - "step": 144 - }, - { - "epoch": 0.18542199488491048, - "grad_norm": 3.1630797621733184, - "learning_rate": 1.9794761160802102e-05, - "loss": 1.4024, - "step": 145 - }, - { - "epoch": 0.1867007672634271, - "grad_norm": 3.3263714749700206, - "learning_rate": 1.9790565999220615e-05, - "loss": 1.4026, - "step": 146 - }, - { - "epoch": 0.18797953964194372, - "grad_norm": 3.2106254059686146, - "learning_rate": 1.9786328848571807e-05, - "loss": 1.4117, - "step": 147 - }, - { - "epoch": 0.18925831202046037, - "grad_norm": 3.1034533631735384, - "learning_rate": 1.9782049727027663e-05, - "loss": 1.3485, - "step": 148 - }, - { - "epoch": 0.19053708439897699, - "grad_norm": 3.571655985565674, - "learning_rate": 1.977772865294017e-05, - "loss": 1.3337, - "step": 149 - }, - { - "epoch": 0.1918158567774936, - "grad_norm": 3.070396090647803, - "learning_rate": 1.9773365644841236e-05, - "loss": 1.3858, - "step": 150 - }, - { - "epoch": 0.19309462915601022, - "grad_norm": 3.1955519502926513, - "learning_rate": 1.9768960721442614e-05, - "loss": 1.3578, - "step": 151 - }, - { - "epoch": 0.19437340153452684, - "grad_norm": 2.69457104686623, - "learning_rate": 1.9764513901635814e-05, - "loss": 0.8301, - "step": 152 - }, - { - "epoch": 0.1956521739130435, - "grad_norm": 3.337515849679424, - "learning_rate": 1.9760025204492038e-05, - "loss": 1.3664, - "step": 153 - }, - { - "epoch": 0.1969309462915601, - "grad_norm": 3.4603246359851614, - "learning_rate": 1.9755494649262085e-05, - "loss": 1.4317, - "step": 154 - }, - { - "epoch": 0.19820971867007672, - "grad_norm": 3.137590078228091, - "learning_rate": 1.975092225537626e-05, - "loss": 1.3885, - "step": 155 - }, - { - "epoch": 0.19948849104859334, - "grad_norm": 2.7983664861992805, - "learning_rate": 1.974630804244433e-05, - "loss": 1.3358, - "step": 156 - }, - { - "epoch": 0.20076726342711, - "grad_norm": 3.2194067733707996, - "learning_rate": 1.9741652030255386e-05, - "loss": 1.389, - "step": 157 - }, - { - "epoch": 0.2020460358056266, - "grad_norm": 2.0993621113237215, - "learning_rate": 1.9736954238777793e-05, - "loss": 0.6953, - "step": 158 - }, - { - "epoch": 0.20332480818414322, - "grad_norm": 3.345426163359473, - "learning_rate": 1.97322146881591e-05, - "loss": 1.4088, - "step": 159 - }, - { - "epoch": 0.20460358056265984, - "grad_norm": 3.0774576104993607, - "learning_rate": 1.9727433398725947e-05, - "loss": 1.3856, - "step": 160 - }, - { - "epoch": 0.20588235294117646, - "grad_norm": 3.0584530091799302, - "learning_rate": 1.9722610390983982e-05, - "loss": 1.3842, - "step": 161 - }, - { - "epoch": 0.2071611253196931, - "grad_norm": 3.138003015872817, - "learning_rate": 1.9717745685617767e-05, - "loss": 1.4167, - "step": 162 - }, - { - "epoch": 0.20843989769820973, - "grad_norm": 2.1778309460294483, - "learning_rate": 1.97128393034907e-05, - "loss": 0.7621, - "step": 163 - }, - { - "epoch": 0.20971867007672634, - "grad_norm": 3.190149854489232, - "learning_rate": 1.970789126564491e-05, - "loss": 1.3806, - "step": 164 - }, - { - "epoch": 0.21099744245524296, - "grad_norm": 3.1493943980321473, - "learning_rate": 1.970290159330119e-05, - "loss": 1.4196, - "step": 165 - }, - { - "epoch": 0.21227621483375958, - "grad_norm": 3.035585748755982, - "learning_rate": 1.9697870307858876e-05, - "loss": 1.3406, - "step": 166 - }, - { - "epoch": 0.21355498721227623, - "grad_norm": 2.9909846203669, - "learning_rate": 1.969279743089578e-05, - "loss": 1.3308, - "step": 167 - }, - { - "epoch": 0.21483375959079284, - "grad_norm": 2.9300858657785884, - "learning_rate": 1.968768298416809e-05, - "loss": 1.2715, - "step": 168 - }, - { - "epoch": 0.21611253196930946, - "grad_norm": 3.040948079419372, - "learning_rate": 1.9682526989610277e-05, - "loss": 1.3927, - "step": 169 - }, - { - "epoch": 0.21739130434782608, - "grad_norm": 1.8502860687489033, - "learning_rate": 1.967732946933499e-05, - "loss": 0.7191, - "step": 170 - }, - { - "epoch": 0.2186700767263427, - "grad_norm": 3.2870679781385816, - "learning_rate": 1.9672090445632975e-05, - "loss": 1.4083, - "step": 171 - }, - { - "epoch": 0.21994884910485935, - "grad_norm": 3.268511593883704, - "learning_rate": 1.966680994097298e-05, - "loss": 1.3705, - "step": 172 - }, - { - "epoch": 0.22122762148337596, - "grad_norm": 3.4764897116912294, - "learning_rate": 1.9661487978001648e-05, - "loss": 1.4704, - "step": 173 - }, - { - "epoch": 0.22250639386189258, - "grad_norm": 3.0025325315781246, - "learning_rate": 1.9656124579543428e-05, - "loss": 1.3873, - "step": 174 - }, - { - "epoch": 0.2237851662404092, - "grad_norm": 2.9837886205309894, - "learning_rate": 1.9650719768600468e-05, - "loss": 1.435, - "step": 175 - }, - { - "epoch": 0.22506393861892582, - "grad_norm": 3.211747035384548, - "learning_rate": 1.964527356835253e-05, - "loss": 1.4369, - "step": 176 - }, - { - "epoch": 0.22634271099744246, - "grad_norm": 2.128202014808127, - "learning_rate": 1.9639786002156884e-05, - "loss": 0.7571, - "step": 177 - }, - { - "epoch": 0.22762148337595908, - "grad_norm": 2.9553922212919286, - "learning_rate": 1.9634257093548204e-05, - "loss": 1.418, - "step": 178 - }, - { - "epoch": 0.2289002557544757, - "grad_norm": 1.7959130863676256, - "learning_rate": 1.962868686623847e-05, - "loss": 0.6977, - "step": 179 - }, - { - "epoch": 0.23017902813299232, - "grad_norm": 1.89133481453925, - "learning_rate": 1.9623075344116872e-05, - "loss": 0.7617, - "step": 180 - }, - { - "epoch": 0.23145780051150894, - "grad_norm": 3.2674236800911034, - "learning_rate": 1.961742255124969e-05, - "loss": 1.4268, - "step": 181 - }, - { - "epoch": 0.23273657289002558, - "grad_norm": 1.9178586388575092, - "learning_rate": 1.961172851188022e-05, - "loss": 0.7656, - "step": 182 - }, - { - "epoch": 0.2340153452685422, - "grad_norm": 2.0886631755497733, - "learning_rate": 1.960599325042863e-05, - "loss": 0.7879, - "step": 183 - }, - { - "epoch": 0.23529411764705882, - "grad_norm": 3.3018517145882784, - "learning_rate": 1.9600216791491912e-05, - "loss": 1.4034, - "step": 184 - }, - { - "epoch": 0.23657289002557544, - "grad_norm": 2.212391671820536, - "learning_rate": 1.9594399159843703e-05, - "loss": 0.7659, - "step": 185 - }, - { - "epoch": 0.23785166240409208, - "grad_norm": 3.2586613686439367, - "learning_rate": 1.9588540380434254e-05, - "loss": 1.3989, - "step": 186 - }, - { - "epoch": 0.2391304347826087, - "grad_norm": 3.018017953889818, - "learning_rate": 1.9582640478390264e-05, - "loss": 1.3343, - "step": 187 - }, - { - "epoch": 0.24040920716112532, - "grad_norm": 2.894977545329943, - "learning_rate": 1.9576699479014803e-05, - "loss": 1.3979, - "step": 188 - }, - { - "epoch": 0.24168797953964194, - "grad_norm": 3.1270755714342324, - "learning_rate": 1.95707174077872e-05, - "loss": 1.4277, - "step": 189 - }, - { - "epoch": 0.24296675191815856, - "grad_norm": 3.041629488429424, - "learning_rate": 1.9564694290362928e-05, - "loss": 1.3781, - "step": 190 - }, - { - "epoch": 0.2442455242966752, - "grad_norm": 3.2096247808436673, - "learning_rate": 1.955863015257349e-05, - "loss": 1.3929, - "step": 191 - }, - { - "epoch": 0.24552429667519182, - "grad_norm": 3.5006429954692075, - "learning_rate": 1.9552525020426323e-05, - "loss": 1.3482, - "step": 192 - }, - { - "epoch": 0.24680306905370844, - "grad_norm": 1.9627997158035808, - "learning_rate": 1.9546378920104673e-05, - "loss": 0.6996, - "step": 193 - }, - { - "epoch": 0.24808184143222506, - "grad_norm": 3.300741229494114, - "learning_rate": 1.954019187796749e-05, - "loss": 1.3782, - "step": 194 - }, - { - "epoch": 0.24936061381074168, - "grad_norm": 3.153249572313123, - "learning_rate": 1.9533963920549307e-05, - "loss": 1.4155, - "step": 195 - }, - { - "epoch": 0.2506393861892583, - "grad_norm": 3.174885241307583, - "learning_rate": 1.9527695074560135e-05, - "loss": 1.3703, - "step": 196 - }, - { - "epoch": 0.25191815856777494, - "grad_norm": 3.4355085299567434, - "learning_rate": 1.952138536688535e-05, - "loss": 1.4089, - "step": 197 - }, - { - "epoch": 0.2531969309462916, - "grad_norm": 3.205923827900231, - "learning_rate": 1.9515034824585556e-05, - "loss": 1.3317, - "step": 198 - }, - { - "epoch": 0.2544757033248082, - "grad_norm": 3.084818108230084, - "learning_rate": 1.9508643474896505e-05, - "loss": 1.4235, - "step": 199 - }, - { - "epoch": 0.2557544757033248, - "grad_norm": 3.1032482966862456, - "learning_rate": 1.9502211345228957e-05, - "loss": 1.3632, - "step": 200 - }, - { - "epoch": 0.2570332480818414, - "grad_norm": 2.9560013949755595, - "learning_rate": 1.9495738463168553e-05, - "loss": 1.413, - "step": 201 - }, - { - "epoch": 0.25831202046035806, - "grad_norm": 3.0068507848307253, - "learning_rate": 1.948922485647572e-05, - "loss": 1.3235, - "step": 202 - }, - { - "epoch": 0.2595907928388747, - "grad_norm": 3.077282894212275, - "learning_rate": 1.948267055308555e-05, - "loss": 1.3268, - "step": 203 - }, - { - "epoch": 0.2608695652173913, - "grad_norm": 2.7314930386793965, - "learning_rate": 1.9476075581107644e-05, - "loss": 1.3444, - "step": 204 - }, - { - "epoch": 0.26214833759590794, - "grad_norm": 3.2140242505407106, - "learning_rate": 1.9469439968826057e-05, - "loss": 1.3195, - "step": 205 - }, - { - "epoch": 0.26342710997442453, - "grad_norm": 2.0870215509742276, - "learning_rate": 1.9462763744699114e-05, - "loss": 0.6763, - "step": 206 - }, - { - "epoch": 0.2647058823529412, - "grad_norm": 3.133399713306804, - "learning_rate": 1.9456046937359315e-05, - "loss": 1.343, - "step": 207 - }, - { - "epoch": 0.2659846547314578, - "grad_norm": 1.731961727384944, - "learning_rate": 1.944928957561322e-05, - "loss": 0.6752, - "step": 208 - }, - { - "epoch": 0.2672634271099744, - "grad_norm": 3.217792007840293, - "learning_rate": 1.9442491688441306e-05, - "loss": 1.3977, - "step": 209 - }, - { - "epoch": 0.26854219948849106, - "grad_norm": 3.1751863652047687, - "learning_rate": 1.9435653304997857e-05, - "loss": 1.4129, - "step": 210 - }, - { - "epoch": 0.26982097186700765, - "grad_norm": 3.0323597312468915, - "learning_rate": 1.9428774454610845e-05, - "loss": 1.351, - "step": 211 - }, - { - "epoch": 0.2710997442455243, - "grad_norm": 2.934803996025642, - "learning_rate": 1.9421855166781768e-05, - "loss": 1.3721, - "step": 212 - }, - { - "epoch": 0.27237851662404094, - "grad_norm": 2.7742870839855995, - "learning_rate": 1.9414895471185576e-05, - "loss": 1.3681, - "step": 213 - }, - { - "epoch": 0.27365728900255754, - "grad_norm": 3.0365421305118114, - "learning_rate": 1.94078953976705e-05, - "loss": 1.3593, - "step": 214 - }, - { - "epoch": 0.2749360613810742, - "grad_norm": 2.815318976466203, - "learning_rate": 1.9400854976257947e-05, - "loss": 1.332, - "step": 215 - }, - { - "epoch": 0.27621483375959077, - "grad_norm": 2.877605848343663, - "learning_rate": 1.9393774237142364e-05, - "loss": 1.3824, - "step": 216 - }, - { - "epoch": 0.2774936061381074, - "grad_norm": 2.253259044017352, - "learning_rate": 1.9386653210691107e-05, - "loss": 0.8557, - "step": 217 - }, - { - "epoch": 0.27877237851662406, - "grad_norm": 3.212147646034573, - "learning_rate": 1.937949192744432e-05, - "loss": 1.3928, - "step": 218 - }, - { - "epoch": 0.28005115089514065, - "grad_norm": 3.3632522828428715, - "learning_rate": 1.93722904181148e-05, - "loss": 1.3786, - "step": 219 - }, - { - "epoch": 0.2813299232736573, - "grad_norm": 3.0844557376464135, - "learning_rate": 1.9365048713587843e-05, - "loss": 1.4052, - "step": 220 - }, - { - "epoch": 0.2826086956521739, - "grad_norm": 2.8254804408758445, - "learning_rate": 1.9357766844921152e-05, - "loss": 1.3673, - "step": 221 - }, - { - "epoch": 0.28388746803069054, - "grad_norm": 3.04648056796111, - "learning_rate": 1.935044484334468e-05, - "loss": 1.4374, - "step": 222 - }, - { - "epoch": 0.2851662404092072, - "grad_norm": 3.1600134639498036, - "learning_rate": 1.9343082740260482e-05, - "loss": 1.4229, - "step": 223 - }, - { - "epoch": 0.2864450127877238, - "grad_norm": 2.245622746383177, - "learning_rate": 1.933568056724262e-05, - "loss": 0.7877, - "step": 224 - }, - { - "epoch": 0.2877237851662404, - "grad_norm": 2.0394230953580785, - "learning_rate": 1.9328238356036994e-05, - "loss": 0.7063, - "step": 225 - }, - { - "epoch": 0.289002557544757, - "grad_norm": 2.981731343878721, - "learning_rate": 1.932075613856122e-05, - "loss": 1.3904, - "step": 226 - }, - { - "epoch": 0.29028132992327366, - "grad_norm": 2.9636221831570078, - "learning_rate": 1.9313233946904478e-05, - "loss": 1.3619, - "step": 227 - }, - { - "epoch": 0.2915601023017903, - "grad_norm": 2.976355974166306, - "learning_rate": 1.930567181332741e-05, - "loss": 1.341, - "step": 228 - }, - { - "epoch": 0.2928388746803069, - "grad_norm": 2.4821209890466736, - "learning_rate": 1.9298069770261936e-05, - "loss": 0.7115, - "step": 229 - }, - { - "epoch": 0.29411764705882354, - "grad_norm": 3.0174732909424984, - "learning_rate": 1.929042785031115e-05, - "loss": 1.3114, - "step": 230 - }, - { - "epoch": 0.29539641943734013, - "grad_norm": 3.03260333259794, - "learning_rate": 1.9282746086249157e-05, - "loss": 1.3859, - "step": 231 - }, - { - "epoch": 0.2966751918158568, - "grad_norm": 2.1435498845483165, - "learning_rate": 1.927502451102095e-05, - "loss": 0.7957, - "step": 232 - }, - { - "epoch": 0.2979539641943734, - "grad_norm": 2.1253871728895164, - "learning_rate": 1.926726315774226e-05, - "loss": 0.783, - "step": 233 - }, - { - "epoch": 0.29923273657289, - "grad_norm": 3.05819122255675, - "learning_rate": 1.9259462059699414e-05, - "loss": 1.4073, - "step": 234 - }, - { - "epoch": 0.30051150895140666, - "grad_norm": 3.297873679186644, - "learning_rate": 1.9251621250349198e-05, - "loss": 1.4092, - "step": 235 - }, - { - "epoch": 0.30179028132992325, - "grad_norm": 3.159195245832563, - "learning_rate": 1.9243740763318695e-05, - "loss": 1.3812, - "step": 236 - }, - { - "epoch": 0.3030690537084399, - "grad_norm": 3.0570812155066878, - "learning_rate": 1.9235820632405174e-05, - "loss": 1.3645, - "step": 237 - }, - { - "epoch": 0.30434782608695654, - "grad_norm": 2.8695065783061935, - "learning_rate": 1.9227860891575916e-05, - "loss": 1.4179, - "step": 238 - }, - { - "epoch": 0.30562659846547313, - "grad_norm": 3.088694013864649, - "learning_rate": 1.921986157496807e-05, - "loss": 1.3774, - "step": 239 - }, - { - "epoch": 0.3069053708439898, - "grad_norm": 3.3325289064988883, - "learning_rate": 1.9211822716888532e-05, - "loss": 1.3522, - "step": 240 - }, - { - "epoch": 0.30818414322250637, - "grad_norm": 2.889477275932663, - "learning_rate": 1.9203744351813767e-05, - "loss": 1.4216, - "step": 241 - }, - { - "epoch": 0.309462915601023, - "grad_norm": 2.909014159790925, - "learning_rate": 1.9195626514389682e-05, - "loss": 1.3958, - "step": 242 - }, - { - "epoch": 0.31074168797953966, - "grad_norm": 2.9835236117531463, - "learning_rate": 1.9187469239431468e-05, - "loss": 1.3373, - "step": 243 - }, - { - "epoch": 0.31202046035805625, - "grad_norm": 2.910913994859779, - "learning_rate": 1.917927256192345e-05, - "loss": 1.3075, - "step": 244 - }, - { - "epoch": 0.3132992327365729, - "grad_norm": 3.04655385846043, - "learning_rate": 1.9171036517018945e-05, - "loss": 1.3298, - "step": 245 - }, - { - "epoch": 0.3145780051150895, - "grad_norm": 3.1623035671209894, - "learning_rate": 1.91627611400401e-05, - "loss": 1.444, - "step": 246 - }, - { - "epoch": 0.31585677749360613, - "grad_norm": 3.036324362053659, - "learning_rate": 1.915444646647775e-05, - "loss": 1.3619, - "step": 247 - }, - { - "epoch": 0.3171355498721228, - "grad_norm": 3.080692304538432, - "learning_rate": 1.9146092531991268e-05, - "loss": 1.3729, - "step": 248 - }, - { - "epoch": 0.31841432225063937, - "grad_norm": 3.2358571490879027, - "learning_rate": 1.913769937240839e-05, - "loss": 1.3943, - "step": 249 - }, - { - "epoch": 0.319693094629156, - "grad_norm": 3.1402460901787537, - "learning_rate": 1.91292670237251e-05, - "loss": 1.3461, - "step": 250 - }, - { - "epoch": 0.3209718670076726, - "grad_norm": 2.800863863734151, - "learning_rate": 1.9120795522105435e-05, - "loss": 1.3355, - "step": 251 - }, - { - "epoch": 0.32225063938618925, - "grad_norm": 2.8915907784116683, - "learning_rate": 1.911228490388136e-05, - "loss": 1.4295, - "step": 252 - }, - { - "epoch": 0.3235294117647059, - "grad_norm": 3.0646101756352837, - "learning_rate": 1.9103735205552586e-05, - "loss": 1.377, - "step": 253 - }, - { - "epoch": 0.3248081841432225, - "grad_norm": 2.8420899393492745, - "learning_rate": 1.9095146463786448e-05, - "loss": 1.3519, - "step": 254 - }, - { - "epoch": 0.32608695652173914, - "grad_norm": 3.090069809101474, - "learning_rate": 1.908651871541771e-05, - "loss": 1.3946, - "step": 255 - }, - { - "epoch": 0.3273657289002558, - "grad_norm": 2.901016341747405, - "learning_rate": 1.9077851997448433e-05, - "loss": 1.3479, - "step": 256 - }, - { - "epoch": 0.32864450127877237, - "grad_norm": 3.0622407667688982, - "learning_rate": 1.906914634704781e-05, - "loss": 0.8058, - "step": 257 - }, - { - "epoch": 0.329923273657289, - "grad_norm": 3.2951513953148925, - "learning_rate": 1.9060401801551995e-05, - "loss": 1.3661, - "step": 258 - }, - { - "epoch": 0.3312020460358056, - "grad_norm": 2.864903423136103, - "learning_rate": 1.9051618398463965e-05, - "loss": 1.3033, - "step": 259 - }, - { - "epoch": 0.33248081841432225, - "grad_norm": 3.118151033396187, - "learning_rate": 1.9042796175453337e-05, - "loss": 1.3544, - "step": 260 - }, - { - "epoch": 0.3337595907928389, - "grad_norm": 2.9541510816402132, - "learning_rate": 1.903393517035622e-05, - "loss": 1.388, - "step": 261 - }, - { - "epoch": 0.3350383631713555, - "grad_norm": 2.9846212221688826, - "learning_rate": 1.9025035421175045e-05, - "loss": 1.3444, - "step": 262 - }, - { - "epoch": 0.33631713554987214, - "grad_norm": 2.748685751470814, - "learning_rate": 1.9016096966078415e-05, - "loss": 1.262, - "step": 263 - }, - { - "epoch": 0.3375959079283887, - "grad_norm": 1.86422677296239, - "learning_rate": 1.9007119843400926e-05, - "loss": 0.7722, - "step": 264 - }, - { - "epoch": 0.3388746803069054, - "grad_norm": 1.979013050810319, - "learning_rate": 1.8998104091643e-05, - "loss": 0.7763, - "step": 265 - }, - { - "epoch": 0.340153452685422, - "grad_norm": 3.625495933267479, - "learning_rate": 1.898904974947075e-05, - "loss": 1.4364, - "step": 266 - }, - { - "epoch": 0.3414322250639386, - "grad_norm": 2.917372432040597, - "learning_rate": 1.8979956855715764e-05, - "loss": 1.3252, - "step": 267 - }, - { - "epoch": 0.34271099744245526, - "grad_norm": 2.974811235849606, - "learning_rate": 1.8970825449375e-05, - "loss": 1.3412, - "step": 268 - }, - { - "epoch": 0.34398976982097185, - "grad_norm": 3.0841239887395657, - "learning_rate": 1.8961655569610557e-05, - "loss": 1.3918, - "step": 269 - }, - { - "epoch": 0.3452685421994885, - "grad_norm": 2.950904071527663, - "learning_rate": 1.8952447255749557e-05, - "loss": 1.3527, - "step": 270 - }, - { - "epoch": 0.34654731457800514, - "grad_norm": 3.262066207388626, - "learning_rate": 1.894320054728394e-05, - "loss": 1.3668, - "step": 271 - }, - { - "epoch": 0.34782608695652173, - "grad_norm": 3.050450940790126, - "learning_rate": 1.8933915483870322e-05, - "loss": 1.3921, - "step": 272 - }, - { - "epoch": 0.3491048593350384, - "grad_norm": 3.00405701278293, - "learning_rate": 1.8924592105329807e-05, - "loss": 1.3804, - "step": 273 - }, - { - "epoch": 0.35038363171355497, - "grad_norm": 3.019661947310167, - "learning_rate": 1.8915230451647817e-05, - "loss": 1.4127, - "step": 274 - }, - { - "epoch": 0.3516624040920716, - "grad_norm": 3.043695713405263, - "learning_rate": 1.890583056297394e-05, - "loss": 1.379, - "step": 275 - }, - { - "epoch": 0.35294117647058826, - "grad_norm": 2.903001789581696, - "learning_rate": 1.8896392479621726e-05, - "loss": 1.4774, - "step": 276 - }, - { - "epoch": 0.35421994884910485, - "grad_norm": 2.789445523748627, - "learning_rate": 1.8886916242068546e-05, - "loss": 0.7663, - "step": 277 - }, - { - "epoch": 0.3554987212276215, - "grad_norm": 2.8769897912972606, - "learning_rate": 1.8877401890955396e-05, - "loss": 1.4273, - "step": 278 - }, - { - "epoch": 0.3567774936061381, - "grad_norm": 2.7965390724935983, - "learning_rate": 1.8867849467086734e-05, - "loss": 1.3438, - "step": 279 - }, - { - "epoch": 0.35805626598465473, - "grad_norm": 2.968868292357309, - "learning_rate": 1.8858259011430303e-05, - "loss": 1.3504, - "step": 280 - }, - { - "epoch": 0.3593350383631714, - "grad_norm": 2.5531974952546874, - "learning_rate": 1.8848630565116947e-05, - "loss": 0.7606, - "step": 281 - }, - { - "epoch": 0.36061381074168797, - "grad_norm": 2.1975754094291315, - "learning_rate": 1.883896416944045e-05, - "loss": 0.732, - "step": 282 - }, - { - "epoch": 0.3618925831202046, - "grad_norm": 3.3995731693535176, - "learning_rate": 1.8829259865857344e-05, - "loss": 1.3999, - "step": 283 - }, - { - "epoch": 0.3631713554987212, - "grad_norm": 3.430200308194537, - "learning_rate": 1.881951769598674e-05, - "loss": 1.3248, - "step": 284 - }, - { - "epoch": 0.36445012787723785, - "grad_norm": 3.2649576392044164, - "learning_rate": 1.880973770161015e-05, - "loss": 1.3239, - "step": 285 - }, - { - "epoch": 0.3657289002557545, - "grad_norm": 3.269378830680398, - "learning_rate": 1.8799919924671304e-05, - "loss": 1.383, - "step": 286 - }, - { - "epoch": 0.3670076726342711, - "grad_norm": 3.2311558697186213, - "learning_rate": 1.879006440727596e-05, - "loss": 1.352, - "step": 287 - }, - { - "epoch": 0.36828644501278773, - "grad_norm": 2.9522333319898637, - "learning_rate": 1.878017119169176e-05, - "loss": 1.3284, - "step": 288 - }, - { - "epoch": 0.3695652173913043, - "grad_norm": 3.386089089995496, - "learning_rate": 1.8770240320347994e-05, - "loss": 1.39, - "step": 289 - }, - { - "epoch": 0.37084398976982097, - "grad_norm": 2.6362203207277193, - "learning_rate": 1.8760271835835468e-05, - "loss": 0.7361, - "step": 290 - }, - { - "epoch": 0.3721227621483376, - "grad_norm": 2.821112391122417, - "learning_rate": 1.8750265780906288e-05, - "loss": 1.3135, - "step": 291 - }, - { - "epoch": 0.3734015345268542, - "grad_norm": 3.008745329116554, - "learning_rate": 1.8740222198473698e-05, - "loss": 1.3451, - "step": 292 - }, - { - "epoch": 0.37468030690537085, - "grad_norm": 2.9994367215810587, - "learning_rate": 1.8730141131611882e-05, - "loss": 1.4245, - "step": 293 - }, - { - "epoch": 0.37595907928388744, - "grad_norm": 3.111917765714286, - "learning_rate": 1.872002262355579e-05, - "loss": 1.4097, - "step": 294 - }, - { - "epoch": 0.3772378516624041, - "grad_norm": 2.768998687481, - "learning_rate": 1.870986671770094e-05, - "loss": 1.3279, - "step": 295 - }, - { - "epoch": 0.37851662404092073, - "grad_norm": 2.9883867537300546, - "learning_rate": 1.869967345760324e-05, - "loss": 1.4347, - "step": 296 - }, - { - "epoch": 0.3797953964194373, - "grad_norm": 2.7847532963171036, - "learning_rate": 1.8689442886978807e-05, - "loss": 1.368, - "step": 297 - }, - { - "epoch": 0.38107416879795397, - "grad_norm": 2.287634988767796, - "learning_rate": 1.867917504970377e-05, - "loss": 0.7502, - "step": 298 - }, - { - "epoch": 0.38235294117647056, - "grad_norm": 3.071713582821489, - "learning_rate": 1.8668869989814074e-05, - "loss": 1.3937, - "step": 299 - }, - { - "epoch": 0.3836317135549872, - "grad_norm": 3.2696234365281303, - "learning_rate": 1.865852775150532e-05, - "loss": 1.3864, - "step": 300 - }, - { - "epoch": 0.38491048593350385, - "grad_norm": 2.4765375493456254, - "learning_rate": 1.8648148379132537e-05, - "loss": 1.2419, - "step": 301 - }, - { - "epoch": 0.38618925831202044, - "grad_norm": 1.890926149763113, - "learning_rate": 1.8637731917210034e-05, - "loss": 0.7822, - "step": 302 - }, - { - "epoch": 0.3874680306905371, - "grad_norm": 2.6890473436938245, - "learning_rate": 1.8627278410411166e-05, - "loss": 1.3367, - "step": 303 - }, - { - "epoch": 0.3887468030690537, - "grad_norm": 1.9387340051259037, - "learning_rate": 1.8616787903568177e-05, - "loss": 0.7892, - "step": 304 - }, - { - "epoch": 0.3900255754475703, - "grad_norm": 2.9547114029549197, - "learning_rate": 1.8606260441671987e-05, - "loss": 1.3995, - "step": 305 - }, - { - "epoch": 0.391304347826087, - "grad_norm": 1.8473179875227623, - "learning_rate": 1.8595696069872013e-05, - "loss": 0.779, - "step": 306 - }, - { - "epoch": 0.39258312020460356, - "grad_norm": 2.707756582423239, - "learning_rate": 1.858509483347596e-05, - "loss": 1.4007, - "step": 307 - }, - { - "epoch": 0.3938618925831202, - "grad_norm": 2.7511983477985638, - "learning_rate": 1.8574456777949644e-05, - "loss": 1.3685, - "step": 308 - }, - { - "epoch": 0.39514066496163686, - "grad_norm": 2.8881335012329217, - "learning_rate": 1.856378194891678e-05, - "loss": 1.4164, - "step": 309 - }, - { - "epoch": 0.39641943734015345, - "grad_norm": 2.9810328503686456, - "learning_rate": 1.8553070392158797e-05, - "loss": 1.3752, - "step": 310 - }, - { - "epoch": 0.3976982097186701, - "grad_norm": 2.9063989685465357, - "learning_rate": 1.8542322153614647e-05, - "loss": 1.3252, - "step": 311 - }, - { - "epoch": 0.3989769820971867, - "grad_norm": 2.6894638610615607, - "learning_rate": 1.853153727938059e-05, - "loss": 1.341, - "step": 312 - }, - { - "epoch": 0.40025575447570333, - "grad_norm": 2.8505968821815326, - "learning_rate": 1.8520715815710006e-05, - "loss": 1.3737, - "step": 313 - }, - { - "epoch": 0.40153452685422, - "grad_norm": 2.73468540143606, - "learning_rate": 1.850985780901321e-05, - "loss": 1.3865, - "step": 314 - }, - { - "epoch": 0.40281329923273657, - "grad_norm": 2.07049892992539, - "learning_rate": 1.849896330585722e-05, - "loss": 0.7007, - "step": 315 - }, - { - "epoch": 0.4040920716112532, - "grad_norm": 3.0270160670892112, - "learning_rate": 1.84880323529656e-05, - "loss": 1.3479, - "step": 316 - }, - { - "epoch": 0.4053708439897698, - "grad_norm": 3.3554468042761787, - "learning_rate": 1.8477064997218216e-05, - "loss": 1.3683, - "step": 317 - }, - { - "epoch": 0.40664961636828645, - "grad_norm": 1.9870713588591018, - "learning_rate": 1.8466061285651076e-05, - "loss": 0.676, - "step": 318 - }, - { - "epoch": 0.4079283887468031, - "grad_norm": 3.0408614274349834, - "learning_rate": 1.8455021265456086e-05, - "loss": 1.3422, - "step": 319 - }, - { - "epoch": 0.4092071611253197, - "grad_norm": 1.6762652361451105, - "learning_rate": 1.8443944983980894e-05, - "loss": 0.7421, - "step": 320 - }, - { - "epoch": 0.41048593350383633, - "grad_norm": 2.896879345949577, - "learning_rate": 1.843283248872864e-05, - "loss": 1.4102, - "step": 321 - }, - { - "epoch": 0.4117647058823529, - "grad_norm": 2.801545845277934, - "learning_rate": 1.8421683827357792e-05, - "loss": 1.3845, - "step": 322 - }, - { - "epoch": 0.41304347826086957, - "grad_norm": 2.835023543114968, - "learning_rate": 1.8410499047681917e-05, - "loss": 1.4036, - "step": 323 - }, - { - "epoch": 0.4143222506393862, - "grad_norm": 2.7946781351369228, - "learning_rate": 1.8399278197669475e-05, - "loss": 1.3894, - "step": 324 - }, - { - "epoch": 0.4156010230179028, - "grad_norm": 2.8041197780444946, - "learning_rate": 1.8388021325443646e-05, - "loss": 1.3671, - "step": 325 - }, - { - "epoch": 0.41687979539641945, - "grad_norm": 3.1285917762159707, - "learning_rate": 1.8376728479282077e-05, - "loss": 1.3706, - "step": 326 - }, - { - "epoch": 0.41815856777493604, - "grad_norm": 2.1926274081372057, - "learning_rate": 1.8365399707616707e-05, - "loss": 0.7312, - "step": 327 - }, - { - "epoch": 0.4194373401534527, - "grad_norm": 1.9473773309310831, - "learning_rate": 1.8354035059033544e-05, - "loss": 0.7357, - "step": 328 - }, - { - "epoch": 0.42071611253196933, - "grad_norm": 3.2091084376923704, - "learning_rate": 1.8342634582272473e-05, - "loss": 1.3652, - "step": 329 - }, - { - "epoch": 0.4219948849104859, - "grad_norm": 1.802250991629747, - "learning_rate": 1.8331198326227024e-05, - "loss": 0.7261, - "step": 330 - }, - { - "epoch": 0.42327365728900257, - "grad_norm": 3.321626361991033, - "learning_rate": 1.8319726339944183e-05, - "loss": 1.3583, - "step": 331 - }, - { - "epoch": 0.42455242966751916, - "grad_norm": 2.9290192831814035, - "learning_rate": 1.830821867262417e-05, - "loss": 1.4052, - "step": 332 - }, - { - "epoch": 0.4258312020460358, - "grad_norm": 2.783056777181697, - "learning_rate": 1.8296675373620228e-05, - "loss": 1.383, - "step": 333 - }, - { - "epoch": 0.42710997442455245, - "grad_norm": 3.1353593222826737, - "learning_rate": 1.8285096492438424e-05, - "loss": 1.3905, - "step": 334 - }, - { - "epoch": 0.42838874680306904, - "grad_norm": 2.8471283504046876, - "learning_rate": 1.8273482078737416e-05, - "loss": 1.3899, - "step": 335 - }, - { - "epoch": 0.4296675191815857, - "grad_norm": 2.6215682593789693, - "learning_rate": 1.826183218232826e-05, - "loss": 1.3503, - "step": 336 - }, - { - "epoch": 0.4309462915601023, - "grad_norm": 3.1227705108973396, - "learning_rate": 1.8250146853174186e-05, - "loss": 1.4316, - "step": 337 - }, - { - "epoch": 0.4322250639386189, - "grad_norm": 2.7320294828779565, - "learning_rate": 1.8238426141390378e-05, - "loss": 1.2904, - "step": 338 - }, - { - "epoch": 0.43350383631713557, - "grad_norm": 2.666504064842278, - "learning_rate": 1.8226670097243774e-05, - "loss": 1.3788, - "step": 339 - }, - { - "epoch": 0.43478260869565216, - "grad_norm": 2.6721441011013387, - "learning_rate": 1.8214878771152843e-05, - "loss": 1.3318, - "step": 340 - }, - { - "epoch": 0.4360613810741688, - "grad_norm": 2.5424693012624417, - "learning_rate": 1.8203052213687372e-05, - "loss": 1.2335, - "step": 341 - }, - { - "epoch": 0.4373401534526854, - "grad_norm": 2.728699138845062, - "learning_rate": 1.819119047556823e-05, - "loss": 1.3614, - "step": 342 - }, - { - "epoch": 0.43861892583120204, - "grad_norm": 3.579036840162622, - "learning_rate": 1.8179293607667177e-05, - "loss": 0.8223, - "step": 343 - }, - { - "epoch": 0.4398976982097187, - "grad_norm": 2.907166594259299, - "learning_rate": 1.8167361661006645e-05, - "loss": 1.3449, - "step": 344 - }, - { - "epoch": 0.4411764705882353, - "grad_norm": 2.973457349796427, - "learning_rate": 1.815539468675949e-05, - "loss": 1.3621, - "step": 345 - }, - { - "epoch": 0.4424552429667519, - "grad_norm": 2.7032378256480345, - "learning_rate": 1.8143392736248805e-05, - "loss": 1.3751, - "step": 346 - }, - { - "epoch": 0.4437340153452685, - "grad_norm": 2.6218400450741206, - "learning_rate": 1.813135586094768e-05, - "loss": 1.2616, - "step": 347 - }, - { - "epoch": 0.44501278772378516, - "grad_norm": 2.860791566254861, - "learning_rate": 1.811928411247899e-05, - "loss": 1.3673, - "step": 348 - }, - { - "epoch": 0.4462915601023018, - "grad_norm": 2.9769109412230064, - "learning_rate": 1.8107177542615173e-05, - "loss": 1.3935, - "step": 349 - }, - { - "epoch": 0.4475703324808184, - "grad_norm": 2.7851222654196155, - "learning_rate": 1.8095036203278008e-05, - "loss": 1.4, - "step": 350 - }, - { - "epoch": 0.44884910485933505, - "grad_norm": 3.0652314468253152, - "learning_rate": 1.808286014653838e-05, - "loss": 1.2933, - "step": 351 - }, - { - "epoch": 0.45012787723785164, - "grad_norm": 2.732314603458855, - "learning_rate": 1.8070649424616085e-05, - "loss": 1.3735, - "step": 352 - }, - { - "epoch": 0.4514066496163683, - "grad_norm": 2.7648687354546264, - "learning_rate": 1.8058404089879575e-05, - "loss": 1.2951, - "step": 353 - }, - { - "epoch": 0.45268542199488493, - "grad_norm": 3.153236211079857, - "learning_rate": 1.8046124194845746e-05, - "loss": 1.4376, - "step": 354 - }, - { - "epoch": 0.4539641943734015, - "grad_norm": 2.7935090434621603, - "learning_rate": 1.8033809792179725e-05, - "loss": 1.3273, - "step": 355 - }, - { - "epoch": 0.45524296675191817, - "grad_norm": 3.183493343483552, - "learning_rate": 1.8021460934694627e-05, - "loss": 0.8372, - "step": 356 - }, - { - "epoch": 0.45652173913043476, - "grad_norm": 3.1584645290656623, - "learning_rate": 1.800907767535133e-05, - "loss": 1.3353, - "step": 357 - }, - { - "epoch": 0.4578005115089514, - "grad_norm": 2.3601023560111556, - "learning_rate": 1.7996660067258255e-05, - "loss": 0.7984, - "step": 358 - }, - { - "epoch": 0.45907928388746805, - "grad_norm": 3.5245610447047335, - "learning_rate": 1.798420816367114e-05, - "loss": 1.4142, - "step": 359 - }, - { - "epoch": 0.46035805626598464, - "grad_norm": 3.3965171238348733, - "learning_rate": 1.7971722017992806e-05, - "loss": 1.3348, - "step": 360 - }, - { - "epoch": 0.4616368286445013, - "grad_norm": 2.8633260475943203, - "learning_rate": 1.795920168377292e-05, - "loss": 1.3215, - "step": 361 - }, - { - "epoch": 0.4629156010230179, - "grad_norm": 2.8664483434396866, - "learning_rate": 1.794664721470778e-05, - "loss": 1.3303, - "step": 362 - }, - { - "epoch": 0.4641943734015345, - "grad_norm": 3.120499221073911, - "learning_rate": 1.7934058664640086e-05, - "loss": 1.4127, - "step": 363 - }, - { - "epoch": 0.46547314578005117, - "grad_norm": 2.804091143739762, - "learning_rate": 1.7921436087558693e-05, - "loss": 1.3614, - "step": 364 - }, - { - "epoch": 0.46675191815856776, - "grad_norm": 2.883033845742498, - "learning_rate": 1.7908779537598387e-05, - "loss": 1.3446, - "step": 365 - }, - { - "epoch": 0.4680306905370844, - "grad_norm": 3.0572509767112375, - "learning_rate": 1.789608906903967e-05, - "loss": 1.3469, - "step": 366 - }, - { - "epoch": 0.46930946291560105, - "grad_norm": 2.8694470857054903, - "learning_rate": 1.788336473630849e-05, - "loss": 1.3745, - "step": 367 - }, - { - "epoch": 0.47058823529411764, - "grad_norm": 1.9935448557363311, - "learning_rate": 1.787060659397604e-05, - "loss": 0.7357, - "step": 368 - }, - { - "epoch": 0.4718670076726343, - "grad_norm": 1.7972461585733766, - "learning_rate": 1.7857814696758523e-05, - "loss": 0.7566, - "step": 369 - }, - { - "epoch": 0.4731457800511509, - "grad_norm": 3.550657242834858, - "learning_rate": 1.7844989099516884e-05, - "loss": 1.4061, - "step": 370 - }, - { - "epoch": 0.4744245524296675, - "grad_norm": 3.0065037193460022, - "learning_rate": 1.783212985725662e-05, - "loss": 1.3401, - "step": 371 - }, - { - "epoch": 0.47570332480818417, - "grad_norm": 2.620582507750628, - "learning_rate": 1.7819237025127512e-05, - "loss": 1.351, - "step": 372 - }, - { - "epoch": 0.47698209718670076, - "grad_norm": 2.8939184931783117, - "learning_rate": 1.7806310658423403e-05, - "loss": 1.3781, - "step": 373 - }, - { - "epoch": 0.4782608695652174, - "grad_norm": 3.318167300458692, - "learning_rate": 1.779335081258195e-05, - "loss": 1.3802, - "step": 374 - }, - { - "epoch": 0.479539641943734, - "grad_norm": 2.7897414118602106, - "learning_rate": 1.7780357543184396e-05, - "loss": 1.3366, - "step": 375 - }, - { - "epoch": 0.48081841432225064, - "grad_norm": 2.9912188006416542, - "learning_rate": 1.7767330905955334e-05, - "loss": 1.3298, - "step": 376 - }, - { - "epoch": 0.4820971867007673, - "grad_norm": 3.1397178349278465, - "learning_rate": 1.775427095676246e-05, - "loss": 1.3751, - "step": 377 - }, - { - "epoch": 0.4833759590792839, - "grad_norm": 3.01343344886303, - "learning_rate": 1.7741177751616328e-05, - "loss": 1.3857, - "step": 378 - }, - { - "epoch": 0.4846547314578005, - "grad_norm": 2.77621953307887, - "learning_rate": 1.7728051346670128e-05, - "loss": 1.3132, - "step": 379 - }, - { - "epoch": 0.4859335038363171, - "grad_norm": 3.087752232866755, - "learning_rate": 1.7714891798219432e-05, - "loss": 1.3484, - "step": 380 - }, - { - "epoch": 0.48721227621483376, - "grad_norm": 2.1956952895410473, - "learning_rate": 1.770169916270195e-05, - "loss": 0.7343, - "step": 381 - }, - { - "epoch": 0.4884910485933504, - "grad_norm": 2.954394442835975, - "learning_rate": 1.76884734966973e-05, - "loss": 1.3529, - "step": 382 - }, - { - "epoch": 0.489769820971867, - "grad_norm": 2.8291955947433904, - "learning_rate": 1.767521485692675e-05, - "loss": 1.284, - "step": 383 - }, - { - "epoch": 0.49104859335038364, - "grad_norm": 3.164828098443421, - "learning_rate": 1.7661923300253004e-05, - "loss": 1.4013, - "step": 384 - }, - { - "epoch": 0.49232736572890023, - "grad_norm": 3.216800554081895, - "learning_rate": 1.764859888367991e-05, - "loss": 1.3067, - "step": 385 - }, - { - "epoch": 0.4936061381074169, - "grad_norm": 2.8809425798376997, - "learning_rate": 1.7635241664352266e-05, - "loss": 1.3347, - "step": 386 - }, - { - "epoch": 0.4948849104859335, - "grad_norm": 1.712455659732595, - "learning_rate": 1.7621851699555533e-05, - "loss": 0.6606, - "step": 387 - }, - { - "epoch": 0.4961636828644501, - "grad_norm": 3.083587969622726, - "learning_rate": 1.760842904671563e-05, - "loss": 1.3142, - "step": 388 - }, - { - "epoch": 0.49744245524296676, - "grad_norm": 1.8670600000962494, - "learning_rate": 1.759497376339864e-05, - "loss": 0.7515, - "step": 389 - }, - { - "epoch": 0.49872122762148335, - "grad_norm": 2.7579029155987724, - "learning_rate": 1.7581485907310617e-05, - "loss": 1.2916, - "step": 390 - }, - { - "epoch": 0.5, - "grad_norm": 3.0656588420375614, - "learning_rate": 1.756796553629729e-05, - "loss": 1.3846, - "step": 391 - }, - { - "epoch": 0.5012787723785166, - "grad_norm": 2.7898405141041915, - "learning_rate": 1.7554412708343844e-05, - "loss": 1.3144, - "step": 392 - }, - { - "epoch": 0.5025575447570333, - "grad_norm": 3.3630230491058333, - "learning_rate": 1.7540827481574657e-05, - "loss": 1.3243, - "step": 393 - }, - { - "epoch": 0.5038363171355499, - "grad_norm": 2.8919646341211935, - "learning_rate": 1.7527209914253074e-05, - "loss": 1.369, - "step": 394 - }, - { - "epoch": 0.5051150895140665, - "grad_norm": 1.894837736773219, - "learning_rate": 1.7513560064781115e-05, - "loss": 0.7469, - "step": 395 - }, - { - "epoch": 0.5063938618925832, - "grad_norm": 3.374014320848714, - "learning_rate": 1.7499877991699268e-05, - "loss": 1.3117, - "step": 396 - }, - { - "epoch": 0.5076726342710998, - "grad_norm": 3.5995364542851322, - "learning_rate": 1.7486163753686207e-05, - "loss": 1.3416, - "step": 397 - }, - { - "epoch": 0.5089514066496164, - "grad_norm": 1.726430945349769, - "learning_rate": 1.7472417409558565e-05, - "loss": 0.7147, - "step": 398 - }, - { - "epoch": 0.510230179028133, - "grad_norm": 2.8763242591931855, - "learning_rate": 1.745863901827066e-05, - "loss": 1.3818, - "step": 399 - }, - { - "epoch": 0.5115089514066496, - "grad_norm": 2.9729873308958124, - "learning_rate": 1.7444828638914254e-05, - "loss": 1.3436, - "step": 400 - }, - { - "epoch": 0.5127877237851662, - "grad_norm": 2.970980065179655, - "learning_rate": 1.7430986330718296e-05, - "loss": 1.3788, - "step": 401 - }, - { - "epoch": 0.5140664961636828, - "grad_norm": 2.9675937110580892, - "learning_rate": 1.7417112153048675e-05, - "loss": 1.3997, - "step": 402 - }, - { - "epoch": 0.5153452685421995, - "grad_norm": 3.228251816068922, - "learning_rate": 1.740320616540795e-05, - "loss": 1.3789, - "step": 403 - }, - { - "epoch": 0.5166240409207161, - "grad_norm": 3.01857984379334, - "learning_rate": 1.7389268427435114e-05, - "loss": 1.3283, - "step": 404 - }, - { - "epoch": 0.5179028132992327, - "grad_norm": 1.882169112641596, - "learning_rate": 1.7375298998905322e-05, - "loss": 0.6901, - "step": 405 - }, - { - "epoch": 0.5191815856777494, - "grad_norm": 2.6548662295556427, - "learning_rate": 1.736129793972964e-05, - "loss": 1.3534, - "step": 406 - }, - { - "epoch": 0.520460358056266, - "grad_norm": 2.5640013386006393, - "learning_rate": 1.7347265309954793e-05, - "loss": 1.3362, - "step": 407 - }, - { - "epoch": 0.5217391304347826, - "grad_norm": 2.801573578111598, - "learning_rate": 1.733320116976291e-05, - "loss": 1.3046, - "step": 408 - }, - { - "epoch": 0.5230179028132992, - "grad_norm": 2.786513209344725, - "learning_rate": 1.7319105579471247e-05, - "loss": 1.3601, - "step": 409 - }, - { - "epoch": 0.5242966751918159, - "grad_norm": 2.898756059962813, - "learning_rate": 1.7304978599531944e-05, - "loss": 1.4105, - "step": 410 - }, - { - "epoch": 0.5255754475703325, - "grad_norm": 2.855799976995279, - "learning_rate": 1.7290820290531764e-05, - "loss": 1.3265, - "step": 411 - }, - { - "epoch": 0.5268542199488491, - "grad_norm": 2.663603592053163, - "learning_rate": 1.7276630713191843e-05, - "loss": 1.3536, - "step": 412 - }, - { - "epoch": 0.5281329923273658, - "grad_norm": 2.7694740802703746, - "learning_rate": 1.72624099283674e-05, - "loss": 1.2832, - "step": 413 - }, - { - "epoch": 0.5294117647058824, - "grad_norm": 2.704340690972957, - "learning_rate": 1.7248157997047498e-05, - "loss": 1.3514, - "step": 414 - }, - { - "epoch": 0.530690537084399, - "grad_norm": 2.8167583966966605, - "learning_rate": 1.7233874980354795e-05, - "loss": 1.3901, - "step": 415 - }, - { - "epoch": 0.5319693094629157, - "grad_norm": 2.806695040681416, - "learning_rate": 1.7219560939545246e-05, - "loss": 1.2676, - "step": 416 - }, - { - "epoch": 0.5332480818414322, - "grad_norm": 2.5124408375210128, - "learning_rate": 1.720521593600787e-05, - "loss": 1.3436, - "step": 417 - }, - { - "epoch": 0.5345268542199488, - "grad_norm": 1.9848279350964533, - "learning_rate": 1.7190840031264478e-05, - "loss": 0.7619, - "step": 418 - }, - { - "epoch": 0.5358056265984654, - "grad_norm": 2.8279724723593476, - "learning_rate": 1.71764332869694e-05, - "loss": 1.3311, - "step": 419 - }, - { - "epoch": 0.5370843989769821, - "grad_norm": 3.0309460277874316, - "learning_rate": 1.7161995764909236e-05, - "loss": 1.3058, - "step": 420 - }, - { - "epoch": 0.5383631713554987, - "grad_norm": 2.730982731804814, - "learning_rate": 1.714752752700258e-05, - "loss": 1.3487, - "step": 421 - }, - { - "epoch": 0.5396419437340153, - "grad_norm": 2.6772991528613383, - "learning_rate": 1.713302863529976e-05, - "loss": 1.333, - "step": 422 - }, - { - "epoch": 0.540920716112532, - "grad_norm": 2.58819018657764, - "learning_rate": 1.711849915198256e-05, - "loss": 1.308, - "step": 423 - }, - { - "epoch": 0.5421994884910486, - "grad_norm": 2.9712832695914626, - "learning_rate": 1.7103939139363978e-05, - "loss": 1.3352, - "step": 424 - }, - { - "epoch": 0.5434782608695652, - "grad_norm": 2.7013763258744086, - "learning_rate": 1.708934865988794e-05, - "loss": 1.3588, - "step": 425 - }, - { - "epoch": 0.5447570332480819, - "grad_norm": 2.810526483043792, - "learning_rate": 1.7074727776129027e-05, - "loss": 1.2475, - "step": 426 - }, - { - "epoch": 0.5460358056265985, - "grad_norm": 2.8719920971110713, - "learning_rate": 1.706007655079222e-05, - "loss": 1.3783, - "step": 427 - }, - { - "epoch": 0.5473145780051151, - "grad_norm": 1.8057442687984455, - "learning_rate": 1.7045395046712638e-05, - "loss": 0.7535, - "step": 428 - }, - { - "epoch": 0.5485933503836317, - "grad_norm": 2.682469914566433, - "learning_rate": 1.703068332685524e-05, - "loss": 1.3076, - "step": 429 - }, - { - "epoch": 0.5498721227621484, - "grad_norm": 3.3090190303761498, - "learning_rate": 1.7015941454314584e-05, - "loss": 1.3455, - "step": 430 - }, - { - "epoch": 0.551150895140665, - "grad_norm": 1.7597274307867168, - "learning_rate": 1.700116949231454e-05, - "loss": 0.7356, - "step": 431 - }, - { - "epoch": 0.5524296675191815, - "grad_norm": 3.0058863154400424, - "learning_rate": 1.698636750420802e-05, - "loss": 1.3747, - "step": 432 - }, - { - "epoch": 0.5537084398976982, - "grad_norm": 2.989421601043101, - "learning_rate": 1.6971535553476723e-05, - "loss": 1.3958, - "step": 433 - }, - { - "epoch": 0.5549872122762148, - "grad_norm": 2.5937039507897315, - "learning_rate": 1.6956673703730828e-05, - "loss": 1.3516, - "step": 434 - }, - { - "epoch": 0.5562659846547314, - "grad_norm": 2.722264759113679, - "learning_rate": 1.6941782018708764e-05, - "loss": 1.3251, - "step": 435 - }, - { - "epoch": 0.5575447570332481, - "grad_norm": 2.5056534686322065, - "learning_rate": 1.6926860562276902e-05, - "loss": 1.3253, - "step": 436 - }, - { - "epoch": 0.5588235294117647, - "grad_norm": 2.6909167992940577, - "learning_rate": 1.6911909398429304e-05, - "loss": 1.3518, - "step": 437 - }, - { - "epoch": 0.5601023017902813, - "grad_norm": 2.720868489302874, - "learning_rate": 1.6896928591287433e-05, - "loss": 1.3191, - "step": 438 - }, - { - "epoch": 0.5613810741687979, - "grad_norm": 2.715513033351372, - "learning_rate": 1.688191820509988e-05, - "loss": 1.2676, - "step": 439 - }, - { - "epoch": 0.5626598465473146, - "grad_norm": 2.8906359935533072, - "learning_rate": 1.6866878304242104e-05, - "loss": 1.3508, - "step": 440 - }, - { - "epoch": 0.5639386189258312, - "grad_norm": 2.647534127563379, - "learning_rate": 1.6851808953216132e-05, - "loss": 1.3538, - "step": 441 - }, - { - "epoch": 0.5652173913043478, - "grad_norm": 2.745298737254531, - "learning_rate": 1.6836710216650304e-05, - "loss": 1.3556, - "step": 442 - }, - { - "epoch": 0.5664961636828645, - "grad_norm": 2.093531567743811, - "learning_rate": 1.6821582159298985e-05, - "loss": 0.781, - "step": 443 - }, - { - "epoch": 0.5677749360613811, - "grad_norm": 2.8772901217064026, - "learning_rate": 1.6806424846042282e-05, - "loss": 1.2903, - "step": 444 - }, - { - "epoch": 0.5690537084398977, - "grad_norm": 2.553351881422692, - "learning_rate": 1.6791238341885777e-05, - "loss": 1.3309, - "step": 445 - }, - { - "epoch": 0.5703324808184144, - "grad_norm": 2.6334311979040623, - "learning_rate": 1.6776022711960252e-05, - "loss": 1.3927, - "step": 446 - }, - { - "epoch": 0.571611253196931, - "grad_norm": 2.6338158570953594, - "learning_rate": 1.676077802152139e-05, - "loss": 1.306, - "step": 447 - }, - { - "epoch": 0.5728900255754475, - "grad_norm": 2.6464614566318643, - "learning_rate": 1.6745504335949505e-05, - "loss": 1.3271, - "step": 448 - }, - { - "epoch": 0.5741687979539642, - "grad_norm": 2.5075779797539965, - "learning_rate": 1.6730201720749277e-05, - "loss": 1.3317, - "step": 449 - }, - { - "epoch": 0.5754475703324808, - "grad_norm": 2.7762519484963546, - "learning_rate": 1.6714870241549442e-05, - "loss": 1.296, - "step": 450 - }, - { - "epoch": 0.5767263427109974, - "grad_norm": 2.599076153373618, - "learning_rate": 1.669950996410254e-05, - "loss": 1.3368, - "step": 451 - }, - { - "epoch": 0.578005115089514, - "grad_norm": 2.5820859999520804, - "learning_rate": 1.6684120954284608e-05, - "loss": 1.3532, - "step": 452 - }, - { - "epoch": 0.5792838874680307, - "grad_norm": 2.036611756724949, - "learning_rate": 1.666870327809491e-05, - "loss": 0.7212, - "step": 453 - }, - { - "epoch": 0.5805626598465473, - "grad_norm": 2.8461132996511997, - "learning_rate": 1.6653257001655652e-05, - "loss": 1.3568, - "step": 454 - }, - { - "epoch": 0.5818414322250639, - "grad_norm": 2.746014629086474, - "learning_rate": 1.6637782191211714e-05, - "loss": 1.3499, - "step": 455 - }, - { - "epoch": 0.5831202046035806, - "grad_norm": 1.8428905722561226, - "learning_rate": 1.6622278913130325e-05, - "loss": 0.803, - "step": 456 - }, - { - "epoch": 0.5843989769820972, - "grad_norm": 2.9311121144805936, - "learning_rate": 1.6606747233900816e-05, - "loss": 1.3946, - "step": 457 - }, - { - "epoch": 0.5856777493606138, - "grad_norm": 2.684978532096339, - "learning_rate": 1.659118722013433e-05, - "loss": 1.3478, - "step": 458 - }, - { - "epoch": 0.5869565217391305, - "grad_norm": 2.61252297096862, - "learning_rate": 1.6575598938563516e-05, - "loss": 1.3227, - "step": 459 - }, - { - "epoch": 0.5882352941176471, - "grad_norm": 2.8502231451073587, - "learning_rate": 1.655998245604227e-05, - "loss": 1.3886, - "step": 460 - }, - { - "epoch": 0.5895140664961637, - "grad_norm": 2.7533987181407515, - "learning_rate": 1.6544337839545418e-05, - "loss": 1.3264, - "step": 461 - }, - { - "epoch": 0.5907928388746803, - "grad_norm": 2.7401740662339074, - "learning_rate": 1.652866515616846e-05, - "loss": 1.4464, - "step": 462 - }, - { - "epoch": 0.592071611253197, - "grad_norm": 2.658023430829094, - "learning_rate": 1.6512964473127255e-05, - "loss": 1.3784, - "step": 463 - }, - { - "epoch": 0.5933503836317136, - "grad_norm": 1.8236667681255159, - "learning_rate": 1.6497235857757754e-05, - "loss": 0.6942, - "step": 464 - }, - { - "epoch": 0.5946291560102301, - "grad_norm": 2.777051535772785, - "learning_rate": 1.64814793775157e-05, - "loss": 1.3621, - "step": 465 - }, - { - "epoch": 0.5959079283887468, - "grad_norm": 2.7079117313077004, - "learning_rate": 1.6465695099976332e-05, - "loss": 1.3141, - "step": 466 - }, - { - "epoch": 0.5971867007672634, - "grad_norm": 2.8290423881736233, - "learning_rate": 1.6449883092834116e-05, - "loss": 1.3198, - "step": 467 - }, - { - "epoch": 0.59846547314578, - "grad_norm": 1.8226010405993645, - "learning_rate": 1.6434043423902443e-05, - "loss": 0.7993, - "step": 468 - }, - { - "epoch": 0.5997442455242967, - "grad_norm": 1.6136433849620568, - "learning_rate": 1.6418176161113323e-05, - "loss": 0.741, - "step": 469 - }, - { - "epoch": 0.6010230179028133, - "grad_norm": 2.743386016249407, - "learning_rate": 1.6402281372517127e-05, - "loss": 1.4221, - "step": 470 - }, - { - "epoch": 0.6023017902813299, - "grad_norm": 2.762298242018842, - "learning_rate": 1.6386359126282262e-05, - "loss": 1.3519, - "step": 471 - }, - { - "epoch": 0.6035805626598465, - "grad_norm": 2.6443548835151014, - "learning_rate": 1.637040949069491e-05, - "loss": 1.3079, - "step": 472 - }, - { - "epoch": 0.6048593350383632, - "grad_norm": 2.7566822465416116, - "learning_rate": 1.6354432534158693e-05, - "loss": 1.3794, - "step": 473 - }, - { - "epoch": 0.6061381074168798, - "grad_norm": 1.8485859485736542, - "learning_rate": 1.633842832519443e-05, - "loss": 0.825, - "step": 474 - }, - { - "epoch": 0.6074168797953964, - "grad_norm": 2.756601689495799, - "learning_rate": 1.6322396932439805e-05, - "loss": 1.3254, - "step": 475 - }, - { - "epoch": 0.6086956521739131, - "grad_norm": 2.438373103889362, - "learning_rate": 1.630633842464909e-05, - "loss": 1.2562, - "step": 476 - }, - { - "epoch": 0.6099744245524297, - "grad_norm": 2.579497862098208, - "learning_rate": 1.6290252870692845e-05, - "loss": 1.3614, - "step": 477 - }, - { - "epoch": 0.6112531969309463, - "grad_norm": 1.831161058085749, - "learning_rate": 1.6274140339557626e-05, - "loss": 0.729, - "step": 478 - }, - { - "epoch": 0.612531969309463, - "grad_norm": 2.6881933601149526, - "learning_rate": 1.625800090034568e-05, - "loss": 1.3237, - "step": 479 - }, - { - "epoch": 0.6138107416879796, - "grad_norm": 2.6985378505016153, - "learning_rate": 1.6241834622274663e-05, - "loss": 1.3936, - "step": 480 - }, - { - "epoch": 0.6150895140664961, - "grad_norm": 2.64156396156768, - "learning_rate": 1.6225641574677335e-05, - "loss": 1.3423, - "step": 481 - }, - { - "epoch": 0.6163682864450127, - "grad_norm": 2.554347082724016, - "learning_rate": 1.6209421827001253e-05, - "loss": 1.3014, - "step": 482 - }, - { - "epoch": 0.6176470588235294, - "grad_norm": 1.8580432421605624, - "learning_rate": 1.6193175448808496e-05, - "loss": 0.7425, - "step": 483 - }, - { - "epoch": 0.618925831202046, - "grad_norm": 2.508630698474796, - "learning_rate": 1.617690250977535e-05, - "loss": 1.3422, - "step": 484 - }, - { - "epoch": 0.6202046035805626, - "grad_norm": 2.5666626989336527, - "learning_rate": 1.616060307969201e-05, - "loss": 1.2765, - "step": 485 - }, - { - "epoch": 0.6214833759590793, - "grad_norm": 2.495207235307933, - "learning_rate": 1.614427722846229e-05, - "loss": 1.2742, - "step": 486 - }, - { - "epoch": 0.6227621483375959, - "grad_norm": 2.5722360280509275, - "learning_rate": 1.612792502610331e-05, - "loss": 1.3066, - "step": 487 - }, - { - "epoch": 0.6240409207161125, - "grad_norm": 2.543474908644559, - "learning_rate": 1.6111546542745203e-05, - "loss": 1.3294, - "step": 488 - }, - { - "epoch": 0.6253196930946292, - "grad_norm": 2.4927881477532425, - "learning_rate": 1.609514184863082e-05, - "loss": 1.3122, - "step": 489 - }, - { - "epoch": 0.6265984654731458, - "grad_norm": 2.779097447690414, - "learning_rate": 1.6078711014115427e-05, - "loss": 1.4348, - "step": 490 - }, - { - "epoch": 0.6278772378516624, - "grad_norm": 2.767872009592664, - "learning_rate": 1.6062254109666383e-05, - "loss": 1.3536, - "step": 491 - }, - { - "epoch": 0.629156010230179, - "grad_norm": 2.5188512231751243, - "learning_rate": 1.6045771205862863e-05, - "loss": 1.3469, - "step": 492 - }, - { - "epoch": 0.6304347826086957, - "grad_norm": 1.757729967017052, - "learning_rate": 1.6029262373395547e-05, - "loss": 0.7061, - "step": 493 - }, - { - "epoch": 0.6317135549872123, - "grad_norm": 2.663048593981195, - "learning_rate": 1.601272768306632e-05, - "loss": 1.3214, - "step": 494 - }, - { - "epoch": 0.6329923273657289, - "grad_norm": 2.742324608055352, - "learning_rate": 1.5996167205787953e-05, - "loss": 1.2362, - "step": 495 - }, - { - "epoch": 0.6342710997442456, - "grad_norm": 2.7965307429022253, - "learning_rate": 1.597958101258382e-05, - "loss": 1.3299, - "step": 496 - }, - { - "epoch": 0.6355498721227621, - "grad_norm": 2.4299236688246775, - "learning_rate": 1.5962969174587576e-05, - "loss": 1.2648, - "step": 497 - }, - { - "epoch": 0.6368286445012787, - "grad_norm": 2.626645089293061, - "learning_rate": 1.594633176304287e-05, - "loss": 1.3886, - "step": 498 - }, - { - "epoch": 0.6381074168797954, - "grad_norm": 2.7540335613250373, - "learning_rate": 1.5929668849303013e-05, - "loss": 1.3148, - "step": 499 - }, - { - "epoch": 0.639386189258312, - "grad_norm": 2.7434120145969763, - "learning_rate": 1.591298050483071e-05, - "loss": 1.3523, - "step": 500 - }, - { - "epoch": 0.6406649616368286, - "grad_norm": 2.5488500515156396, - "learning_rate": 1.5896266801197703e-05, - "loss": 1.3518, - "step": 501 - }, - { - "epoch": 0.6419437340153452, - "grad_norm": 2.549646318841425, - "learning_rate": 1.5879527810084523e-05, - "loss": 1.2914, - "step": 502 - }, - { - "epoch": 0.6432225063938619, - "grad_norm": 2.896148797942254, - "learning_rate": 1.586276360328013e-05, - "loss": 1.2907, - "step": 503 - }, - { - "epoch": 0.6445012787723785, - "grad_norm": 1.920369988655418, - "learning_rate": 1.584597425268164e-05, - "loss": 0.7464, - "step": 504 - }, - { - "epoch": 0.6457800511508951, - "grad_norm": 2.735309530169063, - "learning_rate": 1.5829159830293993e-05, - "loss": 1.3009, - "step": 505 - }, - { - "epoch": 0.6470588235294118, - "grad_norm": 2.735043046100987, - "learning_rate": 1.581232040822966e-05, - "loss": 1.3569, - "step": 506 - }, - { - "epoch": 0.6483375959079284, - "grad_norm": 2.805753648141297, - "learning_rate": 1.5795456058708336e-05, - "loss": 1.3282, - "step": 507 - }, - { - "epoch": 0.649616368286445, - "grad_norm": 2.786850238236333, - "learning_rate": 1.5778566854056614e-05, - "loss": 1.2962, - "step": 508 - }, - { - "epoch": 0.6508951406649617, - "grad_norm": 1.7042186890743387, - "learning_rate": 1.5761652866707683e-05, - "loss": 0.7254, - "step": 509 - }, - { - "epoch": 0.6521739130434783, - "grad_norm": 2.5963157111572253, - "learning_rate": 1.574471416920102e-05, - "loss": 1.2986, - "step": 510 - }, - { - "epoch": 0.6534526854219949, - "grad_norm": 2.829657711693454, - "learning_rate": 1.572775083418209e-05, - "loss": 1.3048, - "step": 511 - }, - { - "epoch": 0.6547314578005116, - "grad_norm": 2.901726873478403, - "learning_rate": 1.5710762934402002e-05, - "loss": 1.2862, - "step": 512 - }, - { - "epoch": 0.6560102301790282, - "grad_norm": 2.9093116842206914, - "learning_rate": 1.5693750542717223e-05, - "loss": 1.2729, - "step": 513 - }, - { - "epoch": 0.6572890025575447, - "grad_norm": 2.504089441131774, - "learning_rate": 1.567671373208927e-05, - "loss": 1.2842, - "step": 514 - }, - { - "epoch": 0.6585677749360613, - "grad_norm": 2.6058666420172134, - "learning_rate": 1.5659652575584367e-05, - "loss": 1.3647, - "step": 515 - }, - { - "epoch": 0.659846547314578, - "grad_norm": 1.8394207392611448, - "learning_rate": 1.5642567146373163e-05, - "loss": 0.7152, - "step": 516 - }, - { - "epoch": 0.6611253196930946, - "grad_norm": 2.717529054180534, - "learning_rate": 1.562545751773041e-05, - "loss": 1.3628, - "step": 517 - }, - { - "epoch": 0.6624040920716112, - "grad_norm": 2.7002524881089305, - "learning_rate": 1.5608323763034632e-05, - "loss": 1.3092, - "step": 518 - }, - { - "epoch": 0.6636828644501279, - "grad_norm": 2.901014320722085, - "learning_rate": 1.559116595576784e-05, - "loss": 1.2904, - "step": 519 - }, - { - "epoch": 0.6649616368286445, - "grad_norm": 1.7161488507609708, - "learning_rate": 1.5573984169515176e-05, - "loss": 0.7172, - "step": 520 - }, - { - "epoch": 0.6662404092071611, - "grad_norm": 1.6457825340884016, - "learning_rate": 1.5556778477964646e-05, - "loss": 0.7181, - "step": 521 - }, - { - "epoch": 0.6675191815856778, - "grad_norm": 1.7434058188805688, - "learning_rate": 1.5539548954906764e-05, - "loss": 0.7105, - "step": 522 - }, - { - "epoch": 0.6687979539641944, - "grad_norm": 1.6083489739856478, - "learning_rate": 1.5522295674234254e-05, - "loss": 0.6874, - "step": 523 - }, - { - "epoch": 0.670076726342711, - "grad_norm": 2.7180545644410445, - "learning_rate": 1.5505018709941736e-05, - "loss": 1.3054, - "step": 524 - }, - { - "epoch": 0.6713554987212276, - "grad_norm": 1.9764720036766268, - "learning_rate": 1.548771813612539e-05, - "loss": 0.7246, - "step": 525 - }, - { - "epoch": 0.6726342710997443, - "grad_norm": 1.7676123593185245, - "learning_rate": 1.5470394026982665e-05, - "loss": 0.7514, - "step": 526 - }, - { - "epoch": 0.6739130434782609, - "grad_norm": 2.6727780223011917, - "learning_rate": 1.545304645681194e-05, - "loss": 1.3563, - "step": 527 - }, - { - "epoch": 0.6751918158567775, - "grad_norm": 2.78013998044077, - "learning_rate": 1.5435675500012212e-05, - "loss": 1.3167, - "step": 528 - }, - { - "epoch": 0.6764705882352942, - "grad_norm": 2.655395362823017, - "learning_rate": 1.5418281231082778e-05, - "loss": 1.3568, - "step": 529 - }, - { - "epoch": 0.6777493606138107, - "grad_norm": 2.642287077228652, - "learning_rate": 1.5400863724622906e-05, - "loss": 1.3437, - "step": 530 - }, - { - "epoch": 0.6790281329923273, - "grad_norm": 2.604811612326981, - "learning_rate": 1.5383423055331538e-05, - "loss": 1.3289, - "step": 531 - }, - { - "epoch": 0.680306905370844, - "grad_norm": 2.680201827359244, - "learning_rate": 1.536595929800694e-05, - "loss": 1.3446, - "step": 532 - }, - { - "epoch": 0.6815856777493606, - "grad_norm": 1.7759032556617833, - "learning_rate": 1.5348472527546418e-05, - "loss": 0.6521, - "step": 533 - }, - { - "epoch": 0.6828644501278772, - "grad_norm": 2.6476030999419726, - "learning_rate": 1.533096281894595e-05, - "loss": 1.3217, - "step": 534 - }, - { - "epoch": 0.6841432225063938, - "grad_norm": 2.805778769196165, - "learning_rate": 1.5313430247299902e-05, - "loss": 1.3627, - "step": 535 - }, - { - "epoch": 0.6854219948849105, - "grad_norm": 2.6742998778559666, - "learning_rate": 1.5295874887800693e-05, - "loss": 1.2891, - "step": 536 - }, - { - "epoch": 0.6867007672634271, - "grad_norm": 2.692474376789777, - "learning_rate": 1.5278296815738476e-05, - "loss": 1.3622, - "step": 537 - }, - { - "epoch": 0.6879795396419437, - "grad_norm": 2.5070427875361005, - "learning_rate": 1.5260696106500807e-05, - "loss": 1.3031, - "step": 538 - }, - { - "epoch": 0.6892583120204604, - "grad_norm": 2.6163042847947424, - "learning_rate": 1.5243072835572319e-05, - "loss": 1.3324, - "step": 539 - }, - { - "epoch": 0.690537084398977, - "grad_norm": 2.7061719750885347, - "learning_rate": 1.5225427078534422e-05, - "loss": 1.2866, - "step": 540 - }, - { - "epoch": 0.6918158567774936, - "grad_norm": 2.619430794081961, - "learning_rate": 1.5207758911064956e-05, - "loss": 1.2923, - "step": 541 - }, - { - "epoch": 0.6930946291560103, - "grad_norm": 2.4921966304859047, - "learning_rate": 1.5190068408937867e-05, - "loss": 1.2701, - "step": 542 - }, - { - "epoch": 0.6943734015345269, - "grad_norm": 2.4428619280757644, - "learning_rate": 1.5172355648022898e-05, - "loss": 1.2581, - "step": 543 - }, - { - "epoch": 0.6956521739130435, - "grad_norm": 2.6852358910967076, - "learning_rate": 1.5154620704285253e-05, - "loss": 1.3501, - "step": 544 - }, - { - "epoch": 0.69693094629156, - "grad_norm": 2.46224638635215, - "learning_rate": 1.5136863653785258e-05, - "loss": 0.7578, - "step": 545 - }, - { - "epoch": 0.6982097186700768, - "grad_norm": 2.875035439394322, - "learning_rate": 1.5119084572678073e-05, - "loss": 1.3122, - "step": 546 - }, - { - "epoch": 0.6994884910485933, - "grad_norm": 2.7444514614342563, - "learning_rate": 1.5101283537213316e-05, - "loss": 1.2981, - "step": 547 - }, - { - "epoch": 0.7007672634271099, - "grad_norm": 2.6557224879952592, - "learning_rate": 1.5083460623734775e-05, - "loss": 1.3029, - "step": 548 - }, - { - "epoch": 0.7020460358056266, - "grad_norm": 2.6669013843596465, - "learning_rate": 1.5065615908680076e-05, - "loss": 1.3685, - "step": 549 - }, - { - "epoch": 0.7033248081841432, - "grad_norm": 2.6866831564073124, - "learning_rate": 1.5047749468580325e-05, - "loss": 1.365, - "step": 550 - }, - { - "epoch": 0.7046035805626598, - "grad_norm": 2.7418529286311824, - "learning_rate": 1.5029861380059806e-05, - "loss": 1.2951, - "step": 551 - }, - { - "epoch": 0.7058823529411765, - "grad_norm": 1.8724683772947985, - "learning_rate": 1.5011951719835665e-05, - "loss": 0.7271, - "step": 552 - }, - { - "epoch": 0.7071611253196931, - "grad_norm": 2.7123159758864115, - "learning_rate": 1.4994020564717542e-05, - "loss": 1.2826, - "step": 553 - }, - { - "epoch": 0.7084398976982097, - "grad_norm": 2.831189803586677, - "learning_rate": 1.497606799160727e-05, - "loss": 1.2952, - "step": 554 - }, - { - "epoch": 0.7097186700767263, - "grad_norm": 2.6113964777822676, - "learning_rate": 1.4958094077498545e-05, - "loss": 1.3035, - "step": 555 - }, - { - "epoch": 0.710997442455243, - "grad_norm": 2.67017777031599, - "learning_rate": 1.4940098899476576e-05, - "loss": 1.3109, - "step": 556 - }, - { - "epoch": 0.7122762148337596, - "grad_norm": 2.894270629425004, - "learning_rate": 1.4922082534717776e-05, - "loss": 1.2874, - "step": 557 - }, - { - "epoch": 0.7135549872122762, - "grad_norm": 3.2424086663049545, - "learning_rate": 1.4904045060489421e-05, - "loss": 1.4131, - "step": 558 - }, - { - "epoch": 0.7148337595907929, - "grad_norm": 2.534247554048346, - "learning_rate": 1.4885986554149316e-05, - "loss": 1.3212, - "step": 559 - }, - { - "epoch": 0.7161125319693095, - "grad_norm": 2.7008774377289124, - "learning_rate": 1.4867907093145473e-05, - "loss": 1.283, - "step": 560 - }, - { - "epoch": 0.717391304347826, - "grad_norm": 1.6338979071304691, - "learning_rate": 1.4849806755015765e-05, - "loss": 0.7183, - "step": 561 - }, - { - "epoch": 0.7186700767263428, - "grad_norm": 2.982603947070419, - "learning_rate": 1.4831685617387607e-05, - "loss": 1.2987, - "step": 562 - }, - { - "epoch": 0.7199488491048593, - "grad_norm": 2.998698025870041, - "learning_rate": 1.481354375797762e-05, - "loss": 1.2561, - "step": 563 - }, - { - "epoch": 0.7212276214833759, - "grad_norm": 2.475531302542339, - "learning_rate": 1.4795381254591287e-05, - "loss": 1.3179, - "step": 564 - }, - { - "epoch": 0.7225063938618926, - "grad_norm": 2.6332690848742875, - "learning_rate": 1.477719818512263e-05, - "loss": 1.2882, - "step": 565 - }, - { - "epoch": 0.7237851662404092, - "grad_norm": 1.7133214725132566, - "learning_rate": 1.475899462755388e-05, - "loss": 0.7256, - "step": 566 - }, - { - "epoch": 0.7250639386189258, - "grad_norm": 2.7195223072929258, - "learning_rate": 1.4740770659955125e-05, - "loss": 1.3081, - "step": 567 - }, - { - "epoch": 0.7263427109974424, - "grad_norm": 2.640846398388388, - "learning_rate": 1.4722526360483995e-05, - "loss": 1.3325, - "step": 568 - }, - { - "epoch": 0.7276214833759591, - "grad_norm": 2.7704787142128127, - "learning_rate": 1.4704261807385314e-05, - "loss": 1.3474, - "step": 569 - }, - { - "epoch": 0.7289002557544757, - "grad_norm": 2.8300153195088784, - "learning_rate": 1.4685977078990767e-05, - "loss": 1.3545, - "step": 570 - }, - { - "epoch": 0.7301790281329923, - "grad_norm": 1.5896962786351458, - "learning_rate": 1.4667672253718572e-05, - "loss": 0.6819, - "step": 571 - }, - { - "epoch": 0.731457800511509, - "grad_norm": 2.721404066173243, - "learning_rate": 1.4649347410073126e-05, - "loss": 1.369, - "step": 572 - }, - { - "epoch": 0.7327365728900256, - "grad_norm": 2.6244616890120125, - "learning_rate": 1.463100262664469e-05, - "loss": 1.2787, - "step": 573 - }, - { - "epoch": 0.7340153452685422, - "grad_norm": 2.723959690302169, - "learning_rate": 1.4612637982109035e-05, - "loss": 1.3369, - "step": 574 - }, - { - "epoch": 0.7352941176470589, - "grad_norm": 2.5668040473548843, - "learning_rate": 1.4594253555227112e-05, - "loss": 1.3426, - "step": 575 - }, - { - "epoch": 0.7365728900255755, - "grad_norm": 2.657067350163712, - "learning_rate": 1.4575849424844717e-05, - "loss": 1.3373, - "step": 576 - }, - { - "epoch": 0.7378516624040921, - "grad_norm": 2.793544539650444, - "learning_rate": 1.455742566989214e-05, - "loss": 1.3365, - "step": 577 - }, - { - "epoch": 0.7391304347826086, - "grad_norm": 1.9891572484568796, - "learning_rate": 1.4538982369383846e-05, - "loss": 0.6809, - "step": 578 - }, - { - "epoch": 0.7404092071611253, - "grad_norm": 2.6158871535991524, - "learning_rate": 1.4520519602418122e-05, - "loss": 1.3465, - "step": 579 - }, - { - "epoch": 0.7416879795396419, - "grad_norm": 2.754882137063386, - "learning_rate": 1.4502037448176734e-05, - "loss": 1.4015, - "step": 580 - }, - { - "epoch": 0.7429667519181585, - "grad_norm": 2.576240909495278, - "learning_rate": 1.4483535985924606e-05, - "loss": 1.2663, - "step": 581 - }, - { - "epoch": 0.7442455242966752, - "grad_norm": 2.6057852944309268, - "learning_rate": 1.4465015295009464e-05, - "loss": 1.3318, - "step": 582 - }, - { - "epoch": 0.7455242966751918, - "grad_norm": 2.656727753686017, - "learning_rate": 1.444647545486149e-05, - "loss": 1.3171, - "step": 583 - }, - { - "epoch": 0.7468030690537084, - "grad_norm": 2.641635902171619, - "learning_rate": 1.4427916544993014e-05, - "loss": 1.3461, - "step": 584 - }, - { - "epoch": 0.7480818414322251, - "grad_norm": 2.492397682868346, - "learning_rate": 1.4409338644998139e-05, - "loss": 1.3238, - "step": 585 - }, - { - "epoch": 0.7493606138107417, - "grad_norm": 1.7759173616524304, - "learning_rate": 1.43907418345524e-05, - "loss": 0.7133, - "step": 586 - }, - { - "epoch": 0.7506393861892583, - "grad_norm": 2.850571191447556, - "learning_rate": 1.4372126193412452e-05, - "loss": 1.3139, - "step": 587 - }, - { - "epoch": 0.7519181585677749, - "grad_norm": 2.4936966656824104, - "learning_rate": 1.4353491801415706e-05, - "loss": 1.3102, - "step": 588 - }, - { - "epoch": 0.7531969309462916, - "grad_norm": 2.4122323752556585, - "learning_rate": 1.4334838738479979e-05, - "loss": 1.2667, - "step": 589 - }, - { - "epoch": 0.7544757033248082, - "grad_norm": 2.0824526733338042, - "learning_rate": 1.4316167084603177e-05, - "loss": 0.7363, - "step": 590 - }, - { - "epoch": 0.7557544757033248, - "grad_norm": 2.7772758958789807, - "learning_rate": 1.4297476919862931e-05, - "loss": 1.309, - "step": 591 - }, - { - "epoch": 0.7570332480818415, - "grad_norm": 2.818573392747027, - "learning_rate": 1.4278768324416252e-05, - "loss": 1.2501, - "step": 592 - }, - { - "epoch": 0.7583120204603581, - "grad_norm": 2.6663714490033525, - "learning_rate": 1.4260041378499215e-05, - "loss": 1.3448, - "step": 593 - }, - { - "epoch": 0.7595907928388747, - "grad_norm": 2.6488530454076917, - "learning_rate": 1.4241296162426575e-05, - "loss": 1.3229, - "step": 594 - }, - { - "epoch": 0.7608695652173914, - "grad_norm": 2.6453937297942556, - "learning_rate": 1.4222532756591452e-05, - "loss": 1.334, - "step": 595 - }, - { - "epoch": 0.7621483375959079, - "grad_norm": 2.5913728332834953, - "learning_rate": 1.420375124146498e-05, - "loss": 1.2963, - "step": 596 - }, - { - "epoch": 0.7634271099744245, - "grad_norm": 1.9111466543789486, - "learning_rate": 1.4184951697595954e-05, - "loss": 0.6801, - "step": 597 - }, - { - "epoch": 0.7647058823529411, - "grad_norm": 1.7583958785610625, - "learning_rate": 1.4166134205610485e-05, - "loss": 0.7176, - "step": 598 - }, - { - "epoch": 0.7659846547314578, - "grad_norm": 1.8758248263314796, - "learning_rate": 1.4147298846211675e-05, - "loss": 0.7448, - "step": 599 - }, - { - "epoch": 0.7672634271099744, - "grad_norm": 1.7754054720214119, - "learning_rate": 1.412844570017923e-05, - "loss": 0.6973, - "step": 600 - }, - { - "epoch": 0.768542199488491, - "grad_norm": 2.9938013014852745, - "learning_rate": 1.410957484836916e-05, - "loss": 1.3169, - "step": 601 - }, - { - "epoch": 0.7698209718670077, - "grad_norm": 2.8360449765277593, - "learning_rate": 1.4090686371713403e-05, - "loss": 1.3018, - "step": 602 - }, - { - "epoch": 0.7710997442455243, - "grad_norm": 2.646561819778331, - "learning_rate": 1.4071780351219476e-05, - "loss": 1.3464, - "step": 603 - }, - { - "epoch": 0.7723785166240409, - "grad_norm": 2.879622388590208, - "learning_rate": 1.405285686797015e-05, - "loss": 1.3248, - "step": 604 - }, - { - "epoch": 0.7736572890025576, - "grad_norm": 3.006960705856728, - "learning_rate": 1.403391600312308e-05, - "loss": 1.3876, - "step": 605 - }, - { - "epoch": 0.7749360613810742, - "grad_norm": 2.6718111641567974, - "learning_rate": 1.4014957837910475e-05, - "loss": 1.3989, - "step": 606 - }, - { - "epoch": 0.7762148337595908, - "grad_norm": 2.5720544846496387, - "learning_rate": 1.3995982453638732e-05, - "loss": 1.3185, - "step": 607 - }, - { - "epoch": 0.7774936061381074, - "grad_norm": 2.016635592268275, - "learning_rate": 1.3976989931688097e-05, - "loss": 0.8149, - "step": 608 - }, - { - "epoch": 0.7787723785166241, - "grad_norm": 2.827133354864102, - "learning_rate": 1.3957980353512317e-05, - "loss": 1.2414, - "step": 609 - }, - { - "epoch": 0.7800511508951407, - "grad_norm": 1.7393203702823359, - "learning_rate": 1.3938953800638293e-05, - "loss": 0.7741, - "step": 610 - }, - { - "epoch": 0.7813299232736572, - "grad_norm": 2.877855809664499, - "learning_rate": 1.3919910354665715e-05, - "loss": 1.2907, - "step": 611 - }, - { - "epoch": 0.782608695652174, - "grad_norm": 2.7979054960025613, - "learning_rate": 1.3900850097266734e-05, - "loss": 1.3341, - "step": 612 - }, - { - "epoch": 0.7838874680306905, - "grad_norm": 2.4469874253186434, - "learning_rate": 1.388177311018559e-05, - "loss": 1.2726, - "step": 613 - }, - { - "epoch": 0.7851662404092071, - "grad_norm": 2.23571550975296, - "learning_rate": 1.3862679475238284e-05, - "loss": 0.7537, - "step": 614 - }, - { - "epoch": 0.7864450127877238, - "grad_norm": 2.949151230519571, - "learning_rate": 1.3843569274312204e-05, - "loss": 1.2984, - "step": 615 - }, - { - "epoch": 0.7877237851662404, - "grad_norm": 2.898267585313443, - "learning_rate": 1.3824442589365788e-05, - "loss": 1.3328, - "step": 616 - }, - { - "epoch": 0.789002557544757, - "grad_norm": 2.4346420300953326, - "learning_rate": 1.3805299502428176e-05, - "loss": 1.2903, - "step": 617 - }, - { - "epoch": 0.7902813299232737, - "grad_norm": 3.903152290493814, - "learning_rate": 1.3786140095598845e-05, - "loss": 1.2916, - "step": 618 - }, - { - "epoch": 0.7915601023017903, - "grad_norm": 3.019559580520114, - "learning_rate": 1.3766964451047267e-05, - "loss": 1.3258, - "step": 619 - }, - { - "epoch": 0.7928388746803069, - "grad_norm": 2.80777075522026, - "learning_rate": 1.3747772651012548e-05, - "loss": 1.3219, - "step": 620 - }, - { - "epoch": 0.7941176470588235, - "grad_norm": 1.9391367813856175, - "learning_rate": 1.3728564777803089e-05, - "loss": 0.6864, - "step": 621 - }, - { - "epoch": 0.7953964194373402, - "grad_norm": 1.7584987318338607, - "learning_rate": 1.3709340913796213e-05, - "loss": 0.7859, - "step": 622 - }, - { - "epoch": 0.7966751918158568, - "grad_norm": 3.1673728850982603, - "learning_rate": 1.3690101141437835e-05, - "loss": 1.3146, - "step": 623 - }, - { - "epoch": 0.7979539641943734, - "grad_norm": 3.047515269970468, - "learning_rate": 1.3670845543242088e-05, - "loss": 1.2969, - "step": 624 - }, - { - "epoch": 0.7992327365728901, - "grad_norm": 1.770328789042984, - "learning_rate": 1.3651574201790985e-05, - "loss": 0.709, - "step": 625 - }, - { - "epoch": 0.8005115089514067, - "grad_norm": 2.789630633040832, - "learning_rate": 1.363228719973405e-05, - "loss": 1.2587, - "step": 626 - }, - { - "epoch": 0.8017902813299232, - "grad_norm": 1.7132635378602115, - "learning_rate": 1.3612984619787973e-05, - "loss": 0.6853, - "step": 627 - }, - { - "epoch": 0.80306905370844, - "grad_norm": 1.8542438385001776, - "learning_rate": 1.3593666544736259e-05, - "loss": 0.7223, - "step": 628 - }, - { - "epoch": 0.8043478260869565, - "grad_norm": 3.0702680489278698, - "learning_rate": 1.3574333057428863e-05, - "loss": 1.3616, - "step": 629 - }, - { - "epoch": 0.8056265984654731, - "grad_norm": 2.5938888393536668, - "learning_rate": 1.3554984240781834e-05, - "loss": 1.316, - "step": 630 - }, - { - "epoch": 0.8069053708439897, - "grad_norm": 2.743856361556238, - "learning_rate": 1.3535620177776973e-05, - "loss": 1.3413, - "step": 631 - }, - { - "epoch": 0.8081841432225064, - "grad_norm": 2.7434790439285197, - "learning_rate": 1.351624095146147e-05, - "loss": 1.2925, - "step": 632 - }, - { - "epoch": 0.809462915601023, - "grad_norm": 1.9289042524449898, - "learning_rate": 1.349684664494753e-05, - "loss": 0.7365, - "step": 633 - }, - { - "epoch": 0.8107416879795396, - "grad_norm": 1.8011206400964195, - "learning_rate": 1.3477437341412054e-05, - "loss": 0.7012, - "step": 634 - }, - { - "epoch": 0.8120204603580563, - "grad_norm": 2.726492088799429, - "learning_rate": 1.3458013124096247e-05, - "loss": 1.369, - "step": 635 - }, - { - "epoch": 0.8132992327365729, - "grad_norm": 2.6976555015058437, - "learning_rate": 1.3438574076305277e-05, - "loss": 1.3, - "step": 636 - }, - { - "epoch": 0.8145780051150895, - "grad_norm": 2.872184925843084, - "learning_rate": 1.3419120281407926e-05, - "loss": 1.3311, - "step": 637 - }, - { - "epoch": 0.8158567774936062, - "grad_norm": 1.724076240542108, - "learning_rate": 1.3399651822836207e-05, - "loss": 0.7545, - "step": 638 - }, - { - "epoch": 0.8171355498721228, - "grad_norm": 2.5306585909082187, - "learning_rate": 1.3380168784085028e-05, - "loss": 1.3081, - "step": 639 - }, - { - "epoch": 0.8184143222506394, - "grad_norm": 1.7740719062756078, - "learning_rate": 1.3360671248711836e-05, - "loss": 0.7488, - "step": 640 - }, - { - "epoch": 0.819693094629156, - "grad_norm": 2.7915417532122064, - "learning_rate": 1.334115930033624e-05, - "loss": 1.3255, - "step": 641 - }, - { - "epoch": 0.8209718670076727, - "grad_norm": 2.672550535249737, - "learning_rate": 1.3321633022639657e-05, - "loss": 1.2734, - "step": 642 - }, - { - "epoch": 0.8222506393861893, - "grad_norm": 2.719505178672469, - "learning_rate": 1.330209249936498e-05, - "loss": 1.3108, - "step": 643 - }, - { - "epoch": 0.8235294117647058, - "grad_norm": 2.5868982742656232, - "learning_rate": 1.3282537814316174e-05, - "loss": 1.3228, - "step": 644 - }, - { - "epoch": 0.8248081841432225, - "grad_norm": 2.76681996929055, - "learning_rate": 1.326296905135795e-05, - "loss": 1.3116, - "step": 645 - }, - { - "epoch": 0.8260869565217391, - "grad_norm": 2.8995833837814855, - "learning_rate": 1.3243386294415404e-05, - "loss": 1.3042, - "step": 646 - }, - { - "epoch": 0.8273657289002557, - "grad_norm": 1.8713471991809185, - "learning_rate": 1.3223789627473629e-05, - "loss": 0.7245, - "step": 647 - }, - { - "epoch": 0.8286445012787724, - "grad_norm": 2.5442176939274, - "learning_rate": 1.3204179134577391e-05, - "loss": 1.3214, - "step": 648 - }, - { - "epoch": 0.829923273657289, - "grad_norm": 2.5067614416561215, - "learning_rate": 1.3184554899830744e-05, - "loss": 1.3077, - "step": 649 - }, - { - "epoch": 0.8312020460358056, - "grad_norm": 2.496520134832437, - "learning_rate": 1.3164917007396674e-05, - "loss": 1.2828, - "step": 650 - }, - { - "epoch": 0.8324808184143222, - "grad_norm": 1.7624708835085063, - "learning_rate": 1.3145265541496757e-05, - "loss": 0.7369, - "step": 651 - }, - { - "epoch": 0.8337595907928389, - "grad_norm": 2.5479616461999703, - "learning_rate": 1.312560058641076e-05, - "loss": 1.3208, - "step": 652 - }, - { - "epoch": 0.8350383631713555, - "grad_norm": 2.6637481260324787, - "learning_rate": 1.3105922226476313e-05, - "loss": 1.262, - "step": 653 - }, - { - "epoch": 0.8363171355498721, - "grad_norm": 2.652026177465146, - "learning_rate": 1.3086230546088544e-05, - "loss": 1.3127, - "step": 654 - }, - { - "epoch": 0.8375959079283888, - "grad_norm": 2.6176162979941378, - "learning_rate": 1.306652562969969e-05, - "loss": 1.316, - "step": 655 - }, - { - "epoch": 0.8388746803069054, - "grad_norm": 2.5423737704571328, - "learning_rate": 1.3046807561818762e-05, - "loss": 1.2814, - "step": 656 - }, - { - "epoch": 0.840153452685422, - "grad_norm": 2.532915698392224, - "learning_rate": 1.3027076427011185e-05, - "loss": 1.2635, - "step": 657 - }, - { - "epoch": 0.8414322250639387, - "grad_norm": 1.7222931356504905, - "learning_rate": 1.3007332309898406e-05, - "loss": 0.7049, - "step": 658 - }, - { - "epoch": 0.8427109974424553, - "grad_norm": 2.660077416577632, - "learning_rate": 1.2987575295157562e-05, - "loss": 1.3312, - "step": 659 - }, - { - "epoch": 0.8439897698209718, - "grad_norm": 2.6320596447814597, - "learning_rate": 1.2967805467521099e-05, - "loss": 1.3246, - "step": 660 - }, - { - "epoch": 0.8452685421994884, - "grad_norm": 1.6680308719822095, - "learning_rate": 1.294802291177642e-05, - "loss": 0.6929, - "step": 661 - }, - { - "epoch": 0.8465473145780051, - "grad_norm": 2.5223076322345426, - "learning_rate": 1.2928227712765504e-05, - "loss": 1.2796, - "step": 662 - }, - { - "epoch": 0.8478260869565217, - "grad_norm": 1.6792265098784243, - "learning_rate": 1.2908419955384569e-05, - "loss": 0.7458, - "step": 663 - }, - { - "epoch": 0.8491048593350383, - "grad_norm": 2.8180703885089, - "learning_rate": 1.2888599724583677e-05, - "loss": 1.3061, - "step": 664 - }, - { - "epoch": 0.850383631713555, - "grad_norm": 2.398089122528188, - "learning_rate": 1.2868767105366395e-05, - "loss": 1.2686, - "step": 665 - }, - { - "epoch": 0.8516624040920716, - "grad_norm": 2.500329575675278, - "learning_rate": 1.2848922182789419e-05, - "loss": 1.3042, - "step": 666 - }, - { - "epoch": 0.8529411764705882, - "grad_norm": 1.8182024774602572, - "learning_rate": 1.2829065041962207e-05, - "loss": 0.6902, - "step": 667 - }, - { - "epoch": 0.8542199488491049, - "grad_norm": 2.486814365873547, - "learning_rate": 1.2809195768046622e-05, - "loss": 1.3273, - "step": 668 - }, - { - "epoch": 0.8554987212276215, - "grad_norm": 2.41874207171455, - "learning_rate": 1.2789314446256562e-05, - "loss": 1.2912, - "step": 669 - }, - { - "epoch": 0.8567774936061381, - "grad_norm": 2.665774261772443, - "learning_rate": 1.2769421161857589e-05, - "loss": 1.2981, - "step": 670 - }, - { - "epoch": 0.8580562659846548, - "grad_norm": 1.7592127366392296, - "learning_rate": 1.274951600016658e-05, - "loss": 0.7183, - "step": 671 - }, - { - "epoch": 0.8593350383631714, - "grad_norm": 2.6546991444379375, - "learning_rate": 1.272959904655134e-05, - "loss": 1.3416, - "step": 672 - }, - { - "epoch": 0.860613810741688, - "grad_norm": 1.6744343760605678, - "learning_rate": 1.2709670386430253e-05, - "loss": 0.7385, - "step": 673 - }, - { - "epoch": 0.8618925831202046, - "grad_norm": 2.493752831120646, - "learning_rate": 1.2689730105271906e-05, - "loss": 1.2962, - "step": 674 - }, - { - "epoch": 0.8631713554987213, - "grad_norm": 1.6893161639581225, - "learning_rate": 1.2669778288594727e-05, - "loss": 0.7253, - "step": 675 - }, - { - "epoch": 0.8644501278772379, - "grad_norm": 2.5354329013177974, - "learning_rate": 1.264981502196662e-05, - "loss": 1.1826, - "step": 676 - }, - { - "epoch": 0.8657289002557544, - "grad_norm": 2.4094654459309983, - "learning_rate": 1.2629840391004583e-05, - "loss": 1.2683, - "step": 677 - }, - { - "epoch": 0.8670076726342711, - "grad_norm": 2.5828802595178826, - "learning_rate": 1.2609854481374364e-05, - "loss": 1.3514, - "step": 678 - }, - { - "epoch": 0.8682864450127877, - "grad_norm": 2.408398304291094, - "learning_rate": 1.2589857378790083e-05, - "loss": 1.2569, - "step": 679 - }, - { - "epoch": 0.8695652173913043, - "grad_norm": 2.567904688192825, - "learning_rate": 1.2569849169013851e-05, - "loss": 1.298, - "step": 680 - }, - { - "epoch": 0.870843989769821, - "grad_norm": 2.5369153194493617, - "learning_rate": 1.2549829937855427e-05, - "loss": 1.3086, - "step": 681 - }, - { - "epoch": 0.8721227621483376, - "grad_norm": 1.6804550435164007, - "learning_rate": 1.2529799771171835e-05, - "loss": 0.6523, - "step": 682 - }, - { - "epoch": 0.8734015345268542, - "grad_norm": 2.5544309643333913, - "learning_rate": 1.2509758754866994e-05, - "loss": 1.3177, - "step": 683 - }, - { - "epoch": 0.8746803069053708, - "grad_norm": 2.5357932014675493, - "learning_rate": 1.2489706974891361e-05, - "loss": 1.2788, - "step": 684 - }, - { - "epoch": 0.8759590792838875, - "grad_norm": 2.4228303491073686, - "learning_rate": 1.2469644517241544e-05, - "loss": 1.3241, - "step": 685 - }, - { - "epoch": 0.8772378516624041, - "grad_norm": 1.7348902354225548, - "learning_rate": 1.2449571467959958e-05, - "loss": 0.7467, - "step": 686 - }, - { - "epoch": 0.8785166240409207, - "grad_norm": 2.5266964036161856, - "learning_rate": 1.2429487913134438e-05, - "loss": 1.2588, - "step": 687 - }, - { - "epoch": 0.8797953964194374, - "grad_norm": 2.5883381657531364, - "learning_rate": 1.2409393938897868e-05, - "loss": 1.3083, - "step": 688 - }, - { - "epoch": 0.881074168797954, - "grad_norm": 1.8052853051480389, - "learning_rate": 1.2389289631427824e-05, - "loss": 0.7038, - "step": 689 - }, - { - "epoch": 0.8823529411764706, - "grad_norm": 2.606591190903682, - "learning_rate": 1.2369175076946203e-05, - "loss": 1.2702, - "step": 690 - }, - { - "epoch": 0.8836317135549873, - "grad_norm": 2.5123362778031773, - "learning_rate": 1.2349050361718837e-05, - "loss": 1.2721, - "step": 691 - }, - { - "epoch": 0.8849104859335039, - "grad_norm": 2.4583971412434362, - "learning_rate": 1.232891557205514e-05, - "loss": 1.3238, - "step": 692 - }, - { - "epoch": 0.8861892583120204, - "grad_norm": 2.385852872757499, - "learning_rate": 1.2308770794307743e-05, - "loss": 1.3234, - "step": 693 - }, - { - "epoch": 0.887468030690537, - "grad_norm": 2.638998104382766, - "learning_rate": 1.2288616114872091e-05, - "loss": 1.338, - "step": 694 - }, - { - "epoch": 0.8887468030690537, - "grad_norm": 2.43902975088406, - "learning_rate": 1.226845162018612e-05, - "loss": 1.3028, - "step": 695 - }, - { - "epoch": 0.8900255754475703, - "grad_norm": 2.36640245329089, - "learning_rate": 1.2248277396729836e-05, - "loss": 1.2419, - "step": 696 - }, - { - "epoch": 0.8913043478260869, - "grad_norm": 2.5037619447882444, - "learning_rate": 1.2228093531024985e-05, - "loss": 1.321, - "step": 697 - }, - { - "epoch": 0.8925831202046036, - "grad_norm": 2.691547267121765, - "learning_rate": 1.220790010963467e-05, - "loss": 1.3372, - "step": 698 - }, - { - "epoch": 0.8938618925831202, - "grad_norm": 1.904639425615429, - "learning_rate": 1.2187697219162957e-05, - "loss": 0.7552, - "step": 699 - }, - { - "epoch": 0.8951406649616368, - "grad_norm": 1.7072674952351226, - "learning_rate": 1.2167484946254535e-05, - "loss": 0.7207, - "step": 700 - }, - { - "epoch": 0.8964194373401535, - "grad_norm": 2.7779579830679957, - "learning_rate": 1.2147263377594339e-05, - "loss": 1.3491, - "step": 701 - }, - { - "epoch": 0.8976982097186701, - "grad_norm": 2.465684369399297, - "learning_rate": 1.2127032599907151e-05, - "loss": 1.2516, - "step": 702 - }, - { - "epoch": 0.8989769820971867, - "grad_norm": 2.4852151638423208, - "learning_rate": 1.2106792699957264e-05, - "loss": 1.2957, - "step": 703 - }, - { - "epoch": 0.9002557544757033, - "grad_norm": 2.914241416164945, - "learning_rate": 1.2086543764548089e-05, - "loss": 1.2935, - "step": 704 - }, - { - "epoch": 0.90153452685422, - "grad_norm": 2.3640837103323684, - "learning_rate": 1.2066285880521785e-05, - "loss": 1.2528, - "step": 705 - }, - { - "epoch": 0.9028132992327366, - "grad_norm": 2.3726787699923237, - "learning_rate": 1.2046019134758893e-05, - "loss": 1.2775, - "step": 706 - }, - { - "epoch": 0.9040920716112532, - "grad_norm": 1.7528491428781292, - "learning_rate": 1.2025743614177956e-05, - "loss": 0.6771, - "step": 707 - }, - { - "epoch": 0.9053708439897699, - "grad_norm": 2.616263196891728, - "learning_rate": 1.2005459405735157e-05, - "loss": 1.296, - "step": 708 - }, - { - "epoch": 0.9066496163682864, - "grad_norm": 1.6415694230237738, - "learning_rate": 1.1985166596423925e-05, - "loss": 0.6528, - "step": 709 - }, - { - "epoch": 0.907928388746803, - "grad_norm": 2.497279798888619, - "learning_rate": 1.1964865273274593e-05, - "loss": 1.2906, - "step": 710 - }, - { - "epoch": 0.9092071611253197, - "grad_norm": 2.6005927216919367, - "learning_rate": 1.1944555523353995e-05, - "loss": 1.3234, - "step": 711 - }, - { - "epoch": 0.9104859335038363, - "grad_norm": 2.5243709590560117, - "learning_rate": 1.1924237433765111e-05, - "loss": 1.272, - "step": 712 - }, - { - "epoch": 0.9117647058823529, - "grad_norm": 1.8715665418348078, - "learning_rate": 1.1903911091646684e-05, - "loss": 0.6874, - "step": 713 - }, - { - "epoch": 0.9130434782608695, - "grad_norm": 1.743516731392254, - "learning_rate": 1.1883576584172854e-05, - "loss": 0.7736, - "step": 714 - }, - { - "epoch": 0.9143222506393862, - "grad_norm": 2.535325037183684, - "learning_rate": 1.1863233998552775e-05, - "loss": 1.2812, - "step": 715 - }, - { - "epoch": 0.9156010230179028, - "grad_norm": 2.5207325054393874, - "learning_rate": 1.184288342203025e-05, - "loss": 1.2857, - "step": 716 - }, - { - "epoch": 0.9168797953964194, - "grad_norm": 1.908124630110519, - "learning_rate": 1.1822524941883349e-05, - "loss": 0.7356, - "step": 717 - }, - { - "epoch": 0.9181585677749361, - "grad_norm": 2.399443555952961, - "learning_rate": 1.1802158645424044e-05, - "loss": 1.244, - "step": 718 - }, - { - "epoch": 0.9194373401534527, - "grad_norm": 1.816127005818875, - "learning_rate": 1.1781784619997825e-05, - "loss": 0.7305, - "step": 719 - }, - { - "epoch": 0.9207161125319693, - "grad_norm": 2.779794926457551, - "learning_rate": 1.176140295298333e-05, - "loss": 1.3065, - "step": 720 - }, - { - "epoch": 0.921994884910486, - "grad_norm": 1.6906763233710227, - "learning_rate": 1.1741013731791969e-05, - "loss": 0.6976, - "step": 721 - }, - { - "epoch": 0.9232736572890026, - "grad_norm": 2.600271503335399, - "learning_rate": 1.1720617043867552e-05, - "loss": 1.3679, - "step": 722 - }, - { - "epoch": 0.9245524296675192, - "grad_norm": 2.650993533630177, - "learning_rate": 1.1700212976685912e-05, - "loss": 1.3531, - "step": 723 - }, - { - "epoch": 0.9258312020460358, - "grad_norm": 1.922911821265928, - "learning_rate": 1.1679801617754522e-05, - "loss": 0.7382, - "step": 724 - }, - { - "epoch": 0.9271099744245525, - "grad_norm": 2.8315057382130355, - "learning_rate": 1.1659383054612142e-05, - "loss": 1.3017, - "step": 725 - }, - { - "epoch": 0.928388746803069, - "grad_norm": 1.598583975557809, - "learning_rate": 1.1638957374828417e-05, - "loss": 0.7178, - "step": 726 - }, - { - "epoch": 0.9296675191815856, - "grad_norm": 2.540989872054288, - "learning_rate": 1.1618524666003512e-05, - "loss": 1.2577, - "step": 727 - }, - { - "epoch": 0.9309462915601023, - "grad_norm": 2.5147053834634274, - "learning_rate": 1.1598085015767748e-05, - "loss": 1.3062, - "step": 728 - }, - { - "epoch": 0.9322250639386189, - "grad_norm": 2.5392761912209942, - "learning_rate": 1.1577638511781211e-05, - "loss": 1.2986, - "step": 729 - }, - { - "epoch": 0.9335038363171355, - "grad_norm": 2.8072040638914078, - "learning_rate": 1.1557185241733375e-05, - "loss": 1.2924, - "step": 730 - }, - { - "epoch": 0.9347826086956522, - "grad_norm": 1.9265349405988796, - "learning_rate": 1.1536725293342744e-05, - "loss": 0.7134, - "step": 731 - }, - { - "epoch": 0.9360613810741688, - "grad_norm": 2.600853936638501, - "learning_rate": 1.1516258754356447e-05, - "loss": 1.2779, - "step": 732 - }, - { - "epoch": 0.9373401534526854, - "grad_norm": 2.428884194110223, - "learning_rate": 1.1495785712549892e-05, - "loss": 1.2232, - "step": 733 - }, - { - "epoch": 0.9386189258312021, - "grad_norm": 2.634080892868991, - "learning_rate": 1.1475306255726377e-05, - "loss": 1.3118, - "step": 734 - }, - { - "epoch": 0.9398976982097187, - "grad_norm": 2.3378309168619835, - "learning_rate": 1.1454820471716701e-05, - "loss": 1.262, - "step": 735 - }, - { - "epoch": 0.9411764705882353, - "grad_norm": 1.6160497958571902, - "learning_rate": 1.1434328448378801e-05, - "loss": 0.6721, - "step": 736 - }, - { - "epoch": 0.9424552429667519, - "grad_norm": 2.4571118019459264, - "learning_rate": 1.1413830273597388e-05, - "loss": 1.2653, - "step": 737 - }, - { - "epoch": 0.9437340153452686, - "grad_norm": 2.609716230159254, - "learning_rate": 1.1393326035283531e-05, - "loss": 1.2496, - "step": 738 - }, - { - "epoch": 0.9450127877237852, - "grad_norm": 2.5876180534130833, - "learning_rate": 1.1372815821374322e-05, - "loss": 1.2711, - "step": 739 - }, - { - "epoch": 0.9462915601023018, - "grad_norm": 1.7120764288285746, - "learning_rate": 1.1352299719832473e-05, - "loss": 0.7054, - "step": 740 - }, - { - "epoch": 0.9475703324808185, - "grad_norm": 2.52610006051332, - "learning_rate": 1.1331777818645947e-05, - "loss": 1.3211, - "step": 741 - }, - { - "epoch": 0.948849104859335, - "grad_norm": 2.6097388579621392, - "learning_rate": 1.1311250205827584e-05, - "loss": 1.3145, - "step": 742 - }, - { - "epoch": 0.9501278772378516, - "grad_norm": 2.5458101982063597, - "learning_rate": 1.1290716969414714e-05, - "loss": 1.1986, - "step": 743 - }, - { - "epoch": 0.9514066496163683, - "grad_norm": 2.247449675498048, - "learning_rate": 1.1270178197468788e-05, - "loss": 1.2652, - "step": 744 - }, - { - "epoch": 0.9526854219948849, - "grad_norm": 2.533469375593158, - "learning_rate": 1.1249633978075e-05, - "loss": 1.2829, - "step": 745 - }, - { - "epoch": 0.9539641943734015, - "grad_norm": 2.7394161002923934, - "learning_rate": 1.1229084399341901e-05, - "loss": 1.3523, - "step": 746 - }, - { - "epoch": 0.9552429667519181, - "grad_norm": 1.6726227747589206, - "learning_rate": 1.1208529549401028e-05, - "loss": 0.6732, - "step": 747 - }, - { - "epoch": 0.9565217391304348, - "grad_norm": 2.398306151131996, - "learning_rate": 1.1187969516406534e-05, - "loss": 1.2967, - "step": 748 - }, - { - "epoch": 0.9578005115089514, - "grad_norm": 1.8850321385266902, - "learning_rate": 1.1167404388534784e-05, - "loss": 0.75, - "step": 749 - }, - { - "epoch": 0.959079283887468, - "grad_norm": 1.8470481457296177, - "learning_rate": 1.1146834253984008e-05, - "loss": 0.7652, - "step": 750 - }, - { - "epoch": 0.9603580562659847, - "grad_norm": 2.8815470854187963, - "learning_rate": 1.11262592009739e-05, - "loss": 1.3177, - "step": 751 - }, - { - "epoch": 0.9616368286445013, - "grad_norm": 2.5007442117497627, - "learning_rate": 1.110567931774525e-05, - "loss": 1.3065, - "step": 752 - }, - { - "epoch": 0.9629156010230179, - "grad_norm": 2.6143510685568714, - "learning_rate": 1.1085094692559568e-05, - "loss": 1.3393, - "step": 753 - }, - { - "epoch": 0.9641943734015346, - "grad_norm": 2.4271657585038886, - "learning_rate": 1.1064505413698693e-05, - "loss": 1.2078, - "step": 754 - }, - { - "epoch": 0.9654731457800512, - "grad_norm": 2.7619300083020946, - "learning_rate": 1.1043911569464431e-05, - "loss": 1.3088, - "step": 755 - }, - { - "epoch": 0.9667519181585678, - "grad_norm": 2.416193048979704, - "learning_rate": 1.1023313248178162e-05, - "loss": 1.3139, - "step": 756 - }, - { - "epoch": 0.9680306905370843, - "grad_norm": 2.671287410172729, - "learning_rate": 1.1002710538180468e-05, - "loss": 1.3475, - "step": 757 - }, - { - "epoch": 0.969309462915601, - "grad_norm": 2.0381819142031845, - "learning_rate": 1.098210352783075e-05, - "loss": 0.7774, - "step": 758 - }, - { - "epoch": 0.9705882352941176, - "grad_norm": 2.5031141202281337, - "learning_rate": 1.0961492305506857e-05, - "loss": 1.2861, - "step": 759 - }, - { - "epoch": 0.9718670076726342, - "grad_norm": 2.673543001671898, - "learning_rate": 1.0940876959604703e-05, - "loss": 1.2743, - "step": 760 - }, - { - "epoch": 0.9731457800511509, - "grad_norm": 2.577729850642017, - "learning_rate": 1.0920257578537879e-05, - "loss": 1.2754, - "step": 761 - }, - { - "epoch": 0.9744245524296675, - "grad_norm": 2.4755532721304037, - "learning_rate": 1.089963425073729e-05, - "loss": 1.2779, - "step": 762 - }, - { - "epoch": 0.9757033248081841, - "grad_norm": 2.738518548937081, - "learning_rate": 1.0879007064650763e-05, - "loss": 1.3217, - "step": 763 - }, - { - "epoch": 0.9769820971867008, - "grad_norm": 1.6809830241184658, - "learning_rate": 1.0858376108742674e-05, - "loss": 0.7276, - "step": 764 - }, - { - "epoch": 0.9782608695652174, - "grad_norm": 2.4959975420539124, - "learning_rate": 1.0837741471493565e-05, - "loss": 1.2752, - "step": 765 - }, - { - "epoch": 0.979539641943734, - "grad_norm": 2.5897933228537195, - "learning_rate": 1.0817103241399772e-05, - "loss": 1.2671, - "step": 766 - }, - { - "epoch": 0.9808184143222506, - "grad_norm": 1.8245010774177899, - "learning_rate": 1.0796461506973026e-05, - "loss": 0.7114, - "step": 767 - }, - { - "epoch": 0.9820971867007673, - "grad_norm": 2.7925828233734986, - "learning_rate": 1.0775816356740106e-05, - "loss": 1.3096, - "step": 768 - }, - { - "epoch": 0.9833759590792839, - "grad_norm": 2.4768958476449603, - "learning_rate": 1.075516787924242e-05, - "loss": 1.2843, - "step": 769 - }, - { - "epoch": 0.9846547314578005, - "grad_norm": 2.5663195958077467, - "learning_rate": 1.073451616303567e-05, - "loss": 1.2725, - "step": 770 - }, - { - "epoch": 0.9859335038363172, - "grad_norm": 2.9183296496264206, - "learning_rate": 1.071386129668942e-05, - "loss": 1.3451, - "step": 771 - }, - { - "epoch": 0.9872122762148338, - "grad_norm": 2.7569744320073646, - "learning_rate": 1.0693203368786767e-05, - "loss": 1.2728, - "step": 772 - }, - { - "epoch": 0.9884910485933504, - "grad_norm": 3.064509822639105, - "learning_rate": 1.0672542467923929e-05, - "loss": 1.2575, - "step": 773 - }, - { - "epoch": 0.989769820971867, - "grad_norm": 2.5325537586930715, - "learning_rate": 1.0651878682709874e-05, - "loss": 1.3191, - "step": 774 - }, - { - "epoch": 0.9910485933503836, - "grad_norm": 2.5136966609320104, - "learning_rate": 1.0631212101765937e-05, - "loss": 1.2501, - "step": 775 - }, - { - "epoch": 0.9923273657289002, - "grad_norm": 1.9140658020318404, - "learning_rate": 1.0610542813725455e-05, - "loss": 0.7226, - "step": 776 - }, - { - "epoch": 0.9936061381074168, - "grad_norm": 2.656957401053827, - "learning_rate": 1.0589870907233357e-05, - "loss": 1.2741, - "step": 777 - }, - { - "epoch": 0.9948849104859335, - "grad_norm": 2.7205873375686145, - "learning_rate": 1.0569196470945824e-05, - "loss": 1.2944, - "step": 778 - }, - { - "epoch": 0.9961636828644501, - "grad_norm": 2.731013868838514, - "learning_rate": 1.0548519593529865e-05, - "loss": 1.2769, - "step": 779 - }, - { - "epoch": 0.9974424552429667, - "grad_norm": 2.2867535348453982, - "learning_rate": 1.052784036366297e-05, - "loss": 1.2728, - "step": 780 - }, - { - "epoch": 0.9987212276214834, - "grad_norm": 2.315879040536773, - "learning_rate": 1.0507158870032721e-05, - "loss": 0.9834, - "step": 781 - }, - { - "epoch": 1.0, - "grad_norm": 2.648341500139824, - "learning_rate": 1.0486475201336396e-05, - "loss": 1.255, - "step": 782 - }, - { - "epoch": 1.0012787723785166, - "grad_norm": 2.5771276530891067, - "learning_rate": 1.046578944628061e-05, - "loss": 0.4414, - "step": 783 - }, - { - "epoch": 1.0025575447570332, - "grad_norm": 3.39720958638403, - "learning_rate": 1.0445101693580932e-05, - "loss": 0.8525, - "step": 784 - }, - { - "epoch": 1.0038363171355498, - "grad_norm": 3.032831029232034, - "learning_rate": 1.0424412031961485e-05, - "loss": 0.8195, - "step": 785 - }, - { - "epoch": 1.0051150895140666, - "grad_norm": 2.659759484956199, - "learning_rate": 1.0403720550154584e-05, - "loss": 0.8332, - "step": 786 - }, - { - "epoch": 1.0063938618925832, - "grad_norm": 2.673213765105028, - "learning_rate": 1.0383027336900356e-05, - "loss": 0.8125, - "step": 787 - }, - { - "epoch": 1.0076726342710998, - "grad_norm": 2.7700645209870456, - "learning_rate": 1.0362332480946342e-05, - "loss": 0.8035, - "step": 788 - }, - { - "epoch": 1.0089514066496164, - "grad_norm": 3.68006597430297, - "learning_rate": 1.0341636071047143e-05, - "loss": 0.7684, - "step": 789 - }, - { - "epoch": 1.010230179028133, - "grad_norm": 5.627208111286931, - "learning_rate": 1.032093819596401e-05, - "loss": 0.4995, - "step": 790 - }, - { - "epoch": 1.0115089514066495, - "grad_norm": 3.780981971092634, - "learning_rate": 1.0300238944464485e-05, - "loss": 0.7875, - "step": 791 - }, - { - "epoch": 1.0127877237851663, - "grad_norm": 3.2223954536171338, - "learning_rate": 1.0279538405322016e-05, - "loss": 0.7999, - "step": 792 - }, - { - "epoch": 1.014066496163683, - "grad_norm": 2.7367234295302185, - "learning_rate": 1.0258836667315566e-05, - "loss": 0.7659, - "step": 793 - }, - { - "epoch": 1.0153452685421995, - "grad_norm": 2.9242225243241173, - "learning_rate": 1.0238133819229241e-05, - "loss": 0.8224, - "step": 794 - }, - { - "epoch": 1.0166240409207161, - "grad_norm": 2.8824747739181125, - "learning_rate": 1.0217429949851921e-05, - "loss": 0.7447, - "step": 795 - }, - { - "epoch": 1.0179028132992327, - "grad_norm": 2.9796664435484104, - "learning_rate": 1.019672514797684e-05, - "loss": 0.7533, - "step": 796 - }, - { - "epoch": 1.0191815856777493, - "grad_norm": 2.5370868488855156, - "learning_rate": 1.0176019502401258e-05, - "loss": 0.3791, - "step": 797 - }, - { - "epoch": 1.020460358056266, - "grad_norm": 3.4365360268990393, - "learning_rate": 1.0155313101926036e-05, - "loss": 0.7814, - "step": 798 - }, - { - "epoch": 1.0217391304347827, - "grad_norm": 2.1561212523307747, - "learning_rate": 1.0134606035355278e-05, - "loss": 0.4449, - "step": 799 - }, - { - "epoch": 1.0230179028132993, - "grad_norm": 2.01209922969684, - "learning_rate": 1.011389839149595e-05, - "loss": 0.4623, - "step": 800 - }, - { - "epoch": 1.0242966751918159, - "grad_norm": 3.0745296423296375, - "learning_rate": 1.0093190259157482e-05, - "loss": 0.7261, - "step": 801 - }, - { - "epoch": 1.0255754475703325, - "grad_norm": 1.8544555986596571, - "learning_rate": 1.0072481727151409e-05, - "loss": 0.4107, - "step": 802 - }, - { - "epoch": 1.026854219948849, - "grad_norm": 3.2446775348714048, - "learning_rate": 1.0051772884290978e-05, - "loss": 0.7848, - "step": 803 - }, - { - "epoch": 1.0281329923273657, - "grad_norm": 2.0092683960650777, - "learning_rate": 1.0031063819390766e-05, - "loss": 0.4217, - "step": 804 - }, - { - "epoch": 1.0294117647058822, - "grad_norm": 3.2314768869534722, - "learning_rate": 1.0010354621266304e-05, - "loss": 0.7759, - "step": 805 - }, - { - "epoch": 1.030690537084399, - "grad_norm": 3.2592511744991244, - "learning_rate": 9.989645378733698e-06, - "loss": 0.7435, - "step": 806 - }, - { - "epoch": 1.0319693094629157, - "grad_norm": 3.1519711517098243, - "learning_rate": 9.968936180609234e-06, - "loss": 0.7701, - "step": 807 - }, - { - "epoch": 1.0332480818414322, - "grad_norm": 3.3566097434637547, - "learning_rate": 9.948227115709025e-06, - "loss": 0.7998, - "step": 808 - }, - { - "epoch": 1.0345268542199488, - "grad_norm": 2.850325615127575, - "learning_rate": 9.927518272848593e-06, - "loss": 0.7307, - "step": 809 - }, - { - "epoch": 1.0358056265984654, - "grad_norm": 2.9523378858832117, - "learning_rate": 9.906809740842518e-06, - "loss": 0.7762, - "step": 810 - }, - { - "epoch": 1.037084398976982, - "grad_norm": 2.983626034375549, - "learning_rate": 9.886101608504054e-06, - "loss": 0.7819, - "step": 811 - }, - { - "epoch": 1.0383631713554988, - "grad_norm": 1.9717932061774401, - "learning_rate": 9.865393964644724e-06, - "loss": 0.3913, - "step": 812 - }, - { - "epoch": 1.0396419437340154, - "grad_norm": 3.2244746533050885, - "learning_rate": 9.844686898073966e-06, - "loss": 0.7403, - "step": 813 - }, - { - "epoch": 1.040920716112532, - "grad_norm": 2.8506566359355534, - "learning_rate": 9.823980497598746e-06, - "loss": 0.7896, - "step": 814 - }, - { - "epoch": 1.0421994884910486, - "grad_norm": 2.8642046292725105, - "learning_rate": 9.803274852023161e-06, - "loss": 0.6848, - "step": 815 - }, - { - "epoch": 1.0434782608695652, - "grad_norm": 2.8366105642568678, - "learning_rate": 9.78257005014808e-06, - "loss": 0.7799, - "step": 816 - }, - { - "epoch": 1.0447570332480818, - "grad_norm": 1.8983327272464434, - "learning_rate": 9.761866180770762e-06, - "loss": 0.3873, - "step": 817 - }, - { - "epoch": 1.0460358056265984, - "grad_norm": 3.163100348836036, - "learning_rate": 9.741163332684436e-06, - "loss": 0.733, - "step": 818 - }, - { - "epoch": 1.0473145780051152, - "grad_norm": 2.9032593197084733, - "learning_rate": 9.720461594677986e-06, - "loss": 0.7246, - "step": 819 - }, - { - "epoch": 1.0485933503836318, - "grad_norm": 2.800065506947021, - "learning_rate": 9.699761055535519e-06, - "loss": 0.7449, - "step": 820 - }, - { - "epoch": 1.0498721227621484, - "grad_norm": 3.0039216181715145, - "learning_rate": 9.679061804035993e-06, - "loss": 0.7906, - "step": 821 - }, - { - "epoch": 1.051150895140665, - "grad_norm": 3.0840230897616596, - "learning_rate": 9.658363928952859e-06, - "loss": 0.7784, - "step": 822 - }, - { - "epoch": 1.0524296675191815, - "grad_norm": 2.9568271804391832, - "learning_rate": 9.637667519053661e-06, - "loss": 0.797, - "step": 823 - }, - { - "epoch": 1.0537084398976981, - "grad_norm": 1.9091701767269735, - "learning_rate": 9.616972663099648e-06, - "loss": 0.4183, - "step": 824 - }, - { - "epoch": 1.054987212276215, - "grad_norm": 2.7645776949588896, - "learning_rate": 9.596279449845416e-06, - "loss": 0.7292, - "step": 825 - }, - { - "epoch": 1.0562659846547315, - "grad_norm": 3.0952246981923586, - "learning_rate": 9.57558796803852e-06, - "loss": 0.7901, - "step": 826 - }, - { - "epoch": 1.0575447570332481, - "grad_norm": 2.7517337305672998, - "learning_rate": 9.55489830641907e-06, - "loss": 0.7304, - "step": 827 - }, - { - "epoch": 1.0588235294117647, - "grad_norm": 2.7835507990381925, - "learning_rate": 9.53421055371939e-06, - "loss": 0.6817, - "step": 828 - }, - { - "epoch": 1.0601023017902813, - "grad_norm": 3.0646585692721913, - "learning_rate": 9.513524798663609e-06, - "loss": 0.7113, - "step": 829 - }, - { - "epoch": 1.061381074168798, - "grad_norm": 1.9150219376910338, - "learning_rate": 9.492841129967282e-06, - "loss": 0.4395, - "step": 830 - }, - { - "epoch": 1.0626598465473145, - "grad_norm": 1.9018618223319992, - "learning_rate": 9.472159636337032e-06, - "loss": 0.3919, - "step": 831 - }, - { - "epoch": 1.0639386189258313, - "grad_norm": 2.9204109940460223, - "learning_rate": 9.45148040647014e-06, - "loss": 0.737, - "step": 832 - }, - { - "epoch": 1.065217391304348, - "grad_norm": 2.9372157049105865, - "learning_rate": 9.43080352905418e-06, - "loss": 0.7683, - "step": 833 - }, - { - "epoch": 1.0664961636828645, - "grad_norm": 3.2099124853076137, - "learning_rate": 9.410129092766643e-06, - "loss": 0.7426, - "step": 834 - }, - { - "epoch": 1.067774936061381, - "grad_norm": 2.0398349653009764, - "learning_rate": 9.38945718627455e-06, - "loss": 0.4179, - "step": 835 - }, - { - "epoch": 1.0690537084398977, - "grad_norm": 2.885820074404598, - "learning_rate": 9.368787898234066e-06, - "loss": 0.8047, - "step": 836 - }, - { - "epoch": 1.0703324808184143, - "grad_norm": 2.83265550815321, - "learning_rate": 9.348121317290128e-06, - "loss": 0.7377, - "step": 837 - }, - { - "epoch": 1.0716112531969308, - "grad_norm": 3.174319539706804, - "learning_rate": 9.327457532076074e-06, - "loss": 0.7865, - "step": 838 - }, - { - "epoch": 1.0728900255754477, - "grad_norm": 2.637817492170552, - "learning_rate": 9.306796631213234e-06, - "loss": 0.7415, - "step": 839 - }, - { - "epoch": 1.0741687979539642, - "grad_norm": 1.6974120038485723, - "learning_rate": 9.286138703310582e-06, - "loss": 0.4216, - "step": 840 - }, - { - "epoch": 1.0754475703324808, - "grad_norm": 3.12572333068353, - "learning_rate": 9.265483836964336e-06, - "loss": 0.7573, - "step": 841 - }, - { - "epoch": 1.0767263427109974, - "grad_norm": 2.8567672349990603, - "learning_rate": 9.244832120757582e-06, - "loss": 0.8038, - "step": 842 - }, - { - "epoch": 1.078005115089514, - "grad_norm": 2.822281561552331, - "learning_rate": 9.224183643259896e-06, - "loss": 0.7222, - "step": 843 - }, - { - "epoch": 1.0792838874680306, - "grad_norm": 1.757080878371996, - "learning_rate": 9.203538493026975e-06, - "loss": 0.4209, - "step": 844 - }, - { - "epoch": 1.0805626598465472, - "grad_norm": 3.1092357561535113, - "learning_rate": 9.182896758600233e-06, - "loss": 0.7935, - "step": 845 - }, - { - "epoch": 1.081841432225064, - "grad_norm": 2.839526405963611, - "learning_rate": 9.162258528506433e-06, - "loss": 0.688, - "step": 846 - }, - { - "epoch": 1.0831202046035806, - "grad_norm": 3.069712021339007, - "learning_rate": 9.141623891257327e-06, - "loss": 0.763, - "step": 847 - }, - { - "epoch": 1.0843989769820972, - "grad_norm": 3.0151063121780703, - "learning_rate": 9.120992935349238e-06, - "loss": 0.7787, - "step": 848 - }, - { - "epoch": 1.0856777493606138, - "grad_norm": 2.8669991710778033, - "learning_rate": 9.10036574926271e-06, - "loss": 0.811, - "step": 849 - }, - { - "epoch": 1.0869565217391304, - "grad_norm": 3.0164931515949456, - "learning_rate": 9.079742421462123e-06, - "loss": 0.7386, - "step": 850 - }, - { - "epoch": 1.088235294117647, - "grad_norm": 2.8158956685814283, - "learning_rate": 9.059123040395302e-06, - "loss": 0.7391, - "step": 851 - }, - { - "epoch": 1.0895140664961638, - "grad_norm": 2.8263817345008895, - "learning_rate": 9.038507694493143e-06, - "loss": 0.7616, - "step": 852 - }, - { - "epoch": 1.0907928388746804, - "grad_norm": 2.7873580062302676, - "learning_rate": 9.017896472169255e-06, - "loss": 0.6969, - "step": 853 - }, - { - "epoch": 1.092071611253197, - "grad_norm": 1.861373196197555, - "learning_rate": 8.997289461819537e-06, - "loss": 0.4423, - "step": 854 - }, - { - "epoch": 1.0933503836317136, - "grad_norm": 2.669718898980522, - "learning_rate": 8.97668675182184e-06, - "loss": 0.7336, - "step": 855 - }, - { - "epoch": 1.0946291560102301, - "grad_norm": 2.834985716186227, - "learning_rate": 8.956088430535572e-06, - "loss": 0.748, - "step": 856 - }, - { - "epoch": 1.0959079283887467, - "grad_norm": 2.810757791512416, - "learning_rate": 8.935494586301308e-06, - "loss": 0.6895, - "step": 857 - }, - { - "epoch": 1.0971867007672633, - "grad_norm": 2.89554054281433, - "learning_rate": 8.914905307440436e-06, - "loss": 0.7624, - "step": 858 - }, - { - "epoch": 1.0984654731457801, - "grad_norm": 2.8125573851155057, - "learning_rate": 8.894320682254756e-06, - "loss": 0.7068, - "step": 859 - }, - { - "epoch": 1.0997442455242967, - "grad_norm": 3.3151031315737223, - "learning_rate": 8.873740799026105e-06, - "loss": 0.8343, - "step": 860 - }, - { - "epoch": 1.1010230179028133, - "grad_norm": 2.9446472367178855, - "learning_rate": 8.853165746015997e-06, - "loss": 0.7272, - "step": 861 - }, - { - "epoch": 1.10230179028133, - "grad_norm": 1.8820311222218553, - "learning_rate": 8.832595611465221e-06, - "loss": 0.4421, - "step": 862 - }, - { - "epoch": 1.1035805626598465, - "grad_norm": 1.843388269823385, - "learning_rate": 8.81203048359347e-06, - "loss": 0.3886, - "step": 863 - }, - { - "epoch": 1.104859335038363, - "grad_norm": 2.942874227762995, - "learning_rate": 8.791470450598971e-06, - "loss": 0.8079, - "step": 864 - }, - { - "epoch": 1.10613810741688, - "grad_norm": 3.1106640060958366, - "learning_rate": 8.770915600658104e-06, - "loss": 0.7174, - "step": 865 - }, - { - "epoch": 1.1074168797953965, - "grad_norm": 2.8452199260197295, - "learning_rate": 8.750366021925003e-06, - "loss": 0.7776, - "step": 866 - }, - { - "epoch": 1.108695652173913, - "grad_norm": 2.898211259165804, - "learning_rate": 8.729821802531213e-06, - "loss": 0.7146, - "step": 867 - }, - { - "epoch": 1.1099744245524297, - "grad_norm": 3.140597807951491, - "learning_rate": 8.70928303058529e-06, - "loss": 0.7979, - "step": 868 - }, - { - "epoch": 1.1112531969309463, - "grad_norm": 2.9306161475902988, - "learning_rate": 8.688749794172419e-06, - "loss": 0.7201, - "step": 869 - }, - { - "epoch": 1.1125319693094629, - "grad_norm": 3.217445768803425, - "learning_rate": 8.668222181354055e-06, - "loss": 0.7441, - "step": 870 - }, - { - "epoch": 1.1138107416879794, - "grad_norm": 3.1346543216703857, - "learning_rate": 8.647700280167532e-06, - "loss": 0.7897, - "step": 871 - }, - { - "epoch": 1.1150895140664963, - "grad_norm": 2.9967647903879455, - "learning_rate": 8.627184178625683e-06, - "loss": 0.7165, - "step": 872 - }, - { - "epoch": 1.1163682864450128, - "grad_norm": 1.900534107437102, - "learning_rate": 8.60667396471647e-06, - "loss": 0.4096, - "step": 873 - }, - { - "epoch": 1.1176470588235294, - "grad_norm": 3.11933452353444, - "learning_rate": 8.586169726402617e-06, - "loss": 0.7698, - "step": 874 - }, - { - "epoch": 1.118925831202046, - "grad_norm": 1.8312264764090378, - "learning_rate": 8.5656715516212e-06, - "loss": 0.4401, - "step": 875 - }, - { - "epoch": 1.1202046035805626, - "grad_norm": 2.873121835518463, - "learning_rate": 8.545179528283302e-06, - "loss": 0.7527, - "step": 876 - }, - { - "epoch": 1.1214833759590792, - "grad_norm": 3.0718049945176933, - "learning_rate": 8.524693744273628e-06, - "loss": 0.7045, - "step": 877 - }, - { - "epoch": 1.1227621483375958, - "grad_norm": 3.002984909838, - "learning_rate": 8.50421428745011e-06, - "loss": 0.794, - "step": 878 - }, - { - "epoch": 1.1240409207161126, - "grad_norm": 1.7127076517387805, - "learning_rate": 8.483741245643555e-06, - "loss": 0.3994, - "step": 879 - }, - { - "epoch": 1.1253196930946292, - "grad_norm": 1.616761663668132, - "learning_rate": 8.463274706657263e-06, - "loss": 0.4295, - "step": 880 - }, - { - "epoch": 1.1265984654731458, - "grad_norm": 2.96890992239883, - "learning_rate": 8.442814758266628e-06, - "loss": 0.7626, - "step": 881 - }, - { - "epoch": 1.1278772378516624, - "grad_norm": 2.7069106397427833, - "learning_rate": 8.42236148821879e-06, - "loss": 0.7336, - "step": 882 - }, - { - "epoch": 1.129156010230179, - "grad_norm": 2.783761021132523, - "learning_rate": 8.401914984232254e-06, - "loss": 0.7994, - "step": 883 - }, - { - "epoch": 1.1304347826086956, - "grad_norm": 2.785538777765882, - "learning_rate": 8.381475333996491e-06, - "loss": 0.7239, - "step": 884 - }, - { - "epoch": 1.1317135549872122, - "grad_norm": 3.258723923845216, - "learning_rate": 8.361042625171586e-06, - "loss": 0.763, - "step": 885 - }, - { - "epoch": 1.132992327365729, - "grad_norm": 2.607080191608247, - "learning_rate": 8.34061694538786e-06, - "loss": 0.7203, - "step": 886 - }, - { - "epoch": 1.1342710997442456, - "grad_norm": 2.9045819305104406, - "learning_rate": 8.32019838224548e-06, - "loss": 0.6841, - "step": 887 - }, - { - "epoch": 1.1355498721227621, - "grad_norm": 2.5747202637869626, - "learning_rate": 8.29978702331409e-06, - "loss": 0.6955, - "step": 888 - }, - { - "epoch": 1.1368286445012787, - "grad_norm": 2.8747596884829125, - "learning_rate": 8.279382956132453e-06, - "loss": 0.7661, - "step": 889 - }, - { - "epoch": 1.1381074168797953, - "grad_norm": 1.9591632163862727, - "learning_rate": 8.258986268208033e-06, - "loss": 0.4277, - "step": 890 - }, - { - "epoch": 1.1393861892583121, - "grad_norm": 3.0633074399600058, - "learning_rate": 8.238597047016672e-06, - "loss": 0.7448, - "step": 891 - }, - { - "epoch": 1.1406649616368287, - "grad_norm": 3.2606913040964627, - "learning_rate": 8.218215380002178e-06, - "loss": 0.7049, - "step": 892 - }, - { - "epoch": 1.1419437340153453, - "grad_norm": 1.7673022420501587, - "learning_rate": 8.197841354575959e-06, - "loss": 0.3926, - "step": 893 - }, - { - "epoch": 1.143222506393862, - "grad_norm": 3.1053974947855685, - "learning_rate": 8.177475058116653e-06, - "loss": 0.7393, - "step": 894 - }, - { - "epoch": 1.1445012787723785, - "grad_norm": 1.8141943295092446, - "learning_rate": 8.15711657796975e-06, - "loss": 0.3948, - "step": 895 - }, - { - "epoch": 1.145780051150895, - "grad_norm": 3.217078960656602, - "learning_rate": 8.136766001447229e-06, - "loss": 0.7486, - "step": 896 - }, - { - "epoch": 1.1470588235294117, - "grad_norm": 3.1725636895748397, - "learning_rate": 8.116423415827148e-06, - "loss": 0.7563, - "step": 897 - }, - { - "epoch": 1.1483375959079285, - "grad_norm": 2.915038066715857, - "learning_rate": 8.096088908353316e-06, - "loss": 0.6477, - "step": 898 - }, - { - "epoch": 1.149616368286445, - "grad_norm": 3.4977714444063186, - "learning_rate": 8.075762566234892e-06, - "loss": 0.8317, - "step": 899 - }, - { - "epoch": 1.1508951406649617, - "grad_norm": 1.8363331466925388, - "learning_rate": 8.055444476646007e-06, - "loss": 0.4698, - "step": 900 - }, - { - "epoch": 1.1521739130434783, - "grad_norm": 3.0767464429475613, - "learning_rate": 8.035134726725407e-06, - "loss": 0.8061, - "step": 901 - }, - { - "epoch": 1.1534526854219949, - "grad_norm": 2.981382787861935, - "learning_rate": 8.014833403576076e-06, - "loss": 0.7407, - "step": 902 - }, - { - "epoch": 1.1547314578005115, - "grad_norm": 2.9600621593778453, - "learning_rate": 7.994540594264849e-06, - "loss": 0.7658, - "step": 903 - }, - { - "epoch": 1.156010230179028, - "grad_norm": 2.9666003674525423, - "learning_rate": 7.974256385822044e-06, - "loss": 0.8129, - "step": 904 - }, - { - "epoch": 1.1572890025575449, - "grad_norm": 1.5961050633246143, - "learning_rate": 7.95398086524111e-06, - "loss": 0.4142, - "step": 905 - }, - { - "epoch": 1.1585677749360614, - "grad_norm": 2.965197829893621, - "learning_rate": 7.933714119478219e-06, - "loss": 0.7706, - "step": 906 - }, - { - "epoch": 1.159846547314578, - "grad_norm": 3.153014296687628, - "learning_rate": 7.913456235451911e-06, - "loss": 0.7819, - "step": 907 - }, - { - "epoch": 1.1611253196930946, - "grad_norm": 3.100541517600197, - "learning_rate": 7.89320730004274e-06, - "loss": 0.7136, - "step": 908 - }, - { - "epoch": 1.1624040920716112, - "grad_norm": 1.5658310289996948, - "learning_rate": 7.87296740009285e-06, - "loss": 0.3976, - "step": 909 - }, - { - "epoch": 1.1636828644501278, - "grad_norm": 2.7762972366330025, - "learning_rate": 7.852736622405663e-06, - "loss": 0.7434, - "step": 910 - }, - { - "epoch": 1.1649616368286444, - "grad_norm": 2.6832764316980806, - "learning_rate": 7.832515053745466e-06, - "loss": 0.7631, - "step": 911 - }, - { - "epoch": 1.1662404092071612, - "grad_norm": 3.021455398316102, - "learning_rate": 7.812302780837045e-06, - "loss": 0.7489, - "step": 912 - }, - { - "epoch": 1.1675191815856778, - "grad_norm": 3.0753120701997805, - "learning_rate": 7.792099890365333e-06, - "loss": 0.7428, - "step": 913 - }, - { - "epoch": 1.1687979539641944, - "grad_norm": 1.8959038135574482, - "learning_rate": 7.771906468975016e-06, - "loss": 0.4276, - "step": 914 - }, - { - "epoch": 1.170076726342711, - "grad_norm": 3.1033145336204577, - "learning_rate": 7.751722603270166e-06, - "loss": 0.7755, - "step": 915 - }, - { - "epoch": 1.1713554987212276, - "grad_norm": 3.0920545518115006, - "learning_rate": 7.731548379813885e-06, - "loss": 0.7721, - "step": 916 - }, - { - "epoch": 1.1726342710997442, - "grad_norm": 2.6992729847005883, - "learning_rate": 7.71138388512791e-06, - "loss": 0.6933, - "step": 917 - }, - { - "epoch": 1.1739130434782608, - "grad_norm": 2.9173255656868764, - "learning_rate": 7.69122920569226e-06, - "loss": 0.7524, - "step": 918 - }, - { - "epoch": 1.1751918158567776, - "grad_norm": 2.9581546964454577, - "learning_rate": 7.67108442794486e-06, - "loss": 0.7749, - "step": 919 - }, - { - "epoch": 1.1764705882352942, - "grad_norm": 3.043385085371664, - "learning_rate": 7.650949638281168e-06, - "loss": 0.7914, - "step": 920 - }, - { - "epoch": 1.1777493606138107, - "grad_norm": 2.879424860193095, - "learning_rate": 7.6308249230538e-06, - "loss": 0.7763, - "step": 921 - }, - { - "epoch": 1.1790281329923273, - "grad_norm": 2.7769348262244145, - "learning_rate": 7.610710368572177e-06, - "loss": 0.7808, - "step": 922 - }, - { - "epoch": 1.180306905370844, - "grad_norm": 2.773594393188725, - "learning_rate": 7.5906060611021374e-06, - "loss": 0.7155, - "step": 923 - }, - { - "epoch": 1.1815856777493605, - "grad_norm": 1.8039887400599837, - "learning_rate": 7.570512086865566e-06, - "loss": 0.4017, - "step": 924 - }, - { - "epoch": 1.1828644501278773, - "grad_norm": 2.7671105698620115, - "learning_rate": 7.550428532040044e-06, - "loss": 0.7377, - "step": 925 - }, - { - "epoch": 1.184143222506394, - "grad_norm": 2.6799781332685817, - "learning_rate": 7.53035548275846e-06, - "loss": 0.7279, - "step": 926 - }, - { - "epoch": 1.1854219948849105, - "grad_norm": 1.8208624308045198, - "learning_rate": 7.510293025108643e-06, - "loss": 0.3835, - "step": 927 - }, - { - "epoch": 1.186700767263427, - "grad_norm": 3.021642225091634, - "learning_rate": 7.490241245133007e-06, - "loss": 0.719, - "step": 928 - }, - { - "epoch": 1.1879795396419437, - "grad_norm": 3.0833556495603536, - "learning_rate": 7.470200228828168e-06, - "loss": 0.7866, - "step": 929 - }, - { - "epoch": 1.1892583120204603, - "grad_norm": 2.0639995640493094, - "learning_rate": 7.450170062144576e-06, - "loss": 0.4618, - "step": 930 - }, - { - "epoch": 1.190537084398977, - "grad_norm": 2.846990262438077, - "learning_rate": 7.4301508309861515e-06, - "loss": 0.7199, - "step": 931 - }, - { - "epoch": 1.1918158567774937, - "grad_norm": 3.050385788980295, - "learning_rate": 7.410142621209923e-06, - "loss": 0.7489, - "step": 932 - }, - { - "epoch": 1.1930946291560103, - "grad_norm": 2.63200589787841, - "learning_rate": 7.390145518625639e-06, - "loss": 0.6895, - "step": 933 - }, - { - "epoch": 1.1943734015345269, - "grad_norm": 3.0001101896664912, - "learning_rate": 7.37015960899542e-06, - "loss": 0.7327, - "step": 934 - }, - { - "epoch": 1.1956521739130435, - "grad_norm": 2.96844232251041, - "learning_rate": 7.350184978033386e-06, - "loss": 0.6871, - "step": 935 - }, - { - "epoch": 1.19693094629156, - "grad_norm": 2.9028788616319305, - "learning_rate": 7.330221711405274e-06, - "loss": 0.8159, - "step": 936 - }, - { - "epoch": 1.1982097186700766, - "grad_norm": 3.0164607866653412, - "learning_rate": 7.310269894728095e-06, - "loss": 0.6883, - "step": 937 - }, - { - "epoch": 1.1994884910485935, - "grad_norm": 3.1290940675965415, - "learning_rate": 7.290329613569751e-06, - "loss": 0.6965, - "step": 938 - }, - { - "epoch": 1.20076726342711, - "grad_norm": 3.0595239409506556, - "learning_rate": 7.2704009534486635e-06, - "loss": 0.7595, - "step": 939 - }, - { - "epoch": 1.2020460358056266, - "grad_norm": 3.1236378659469772, - "learning_rate": 7.250483999833422e-06, - "loss": 0.6863, - "step": 940 - }, - { - "epoch": 1.2033248081841432, - "grad_norm": 2.886225912922704, - "learning_rate": 7.230578838142413e-06, - "loss": 0.7322, - "step": 941 - }, - { - "epoch": 1.2046035805626598, - "grad_norm": 2.976348664095105, - "learning_rate": 7.2106855537434415e-06, - "loss": 0.7454, - "step": 942 - }, - { - "epoch": 1.2058823529411764, - "grad_norm": 1.7844382742043579, - "learning_rate": 7.1908042319533775e-06, - "loss": 0.4036, - "step": 943 - }, - { - "epoch": 1.207161125319693, - "grad_norm": 3.054555377829083, - "learning_rate": 7.170934958037794e-06, - "loss": 0.7234, - "step": 944 - }, - { - "epoch": 1.2084398976982098, - "grad_norm": 2.7844904572410623, - "learning_rate": 7.151077817210583e-06, - "loss": 0.7504, - "step": 945 - }, - { - "epoch": 1.2097186700767264, - "grad_norm": 3.20442102376037, - "learning_rate": 7.131232894633605e-06, - "loss": 0.7443, - "step": 946 - }, - { - "epoch": 1.210997442455243, - "grad_norm": 2.9209630638195243, - "learning_rate": 7.111400275416328e-06, - "loss": 0.7192, - "step": 947 - }, - { - "epoch": 1.2122762148337596, - "grad_norm": 2.921728586486058, - "learning_rate": 7.091580044615434e-06, - "loss": 0.722, - "step": 948 - }, - { - "epoch": 1.2135549872122762, - "grad_norm": 2.910890280655321, - "learning_rate": 7.071772287234497e-06, - "loss": 0.712, - "step": 949 - }, - { - "epoch": 1.2148337595907928, - "grad_norm": 2.9987563929615844, - "learning_rate": 7.051977088223585e-06, - "loss": 0.7513, - "step": 950 - }, - { - "epoch": 1.2161125319693094, - "grad_norm": 2.9362035359244407, - "learning_rate": 7.032194532478902e-06, - "loss": 0.7557, - "step": 951 - }, - { - "epoch": 1.2173913043478262, - "grad_norm": 2.932509910366061, - "learning_rate": 7.012424704842441e-06, - "loss": 0.7219, - "step": 952 - }, - { - "epoch": 1.2186700767263428, - "grad_norm": 1.7208858137651528, - "learning_rate": 6.9926676901015985e-06, - "loss": 0.4021, - "step": 953 - }, - { - "epoch": 1.2199488491048593, - "grad_norm": 3.049245370831198, - "learning_rate": 6.972923572988819e-06, - "loss": 0.7714, - "step": 954 - }, - { - "epoch": 1.221227621483376, - "grad_norm": 3.2041934147197364, - "learning_rate": 6.9531924381812384e-06, - "loss": 0.7843, - "step": 955 - }, - { - "epoch": 1.2225063938618925, - "grad_norm": 1.7111016410102544, - "learning_rate": 6.933474370300316e-06, - "loss": 0.4547, - "step": 956 - }, - { - "epoch": 1.2237851662404091, - "grad_norm": 1.697225046848492, - "learning_rate": 6.913769453911459e-06, - "loss": 0.4239, - "step": 957 - }, - { - "epoch": 1.2250639386189257, - "grad_norm": 2.7944286750833154, - "learning_rate": 6.894077773523686e-06, - "loss": 0.6964, - "step": 958 - }, - { - "epoch": 1.2263427109974425, - "grad_norm": 3.1405403335281514, - "learning_rate": 6.874399413589245e-06, - "loss": 0.7485, - "step": 959 - }, - { - "epoch": 1.227621483375959, - "grad_norm": 3.1974976671990056, - "learning_rate": 6.854734458503246e-06, - "loss": 0.7978, - "step": 960 - }, - { - "epoch": 1.2289002557544757, - "grad_norm": 2.8433785723731995, - "learning_rate": 6.835082992603326e-06, - "loss": 0.7318, - "step": 961 - }, - { - "epoch": 1.2301790281329923, - "grad_norm": 3.060271050126063, - "learning_rate": 6.815445100169262e-06, - "loss": 0.8046, - "step": 962 - }, - { - "epoch": 1.2314578005115089, - "grad_norm": 1.6521683378176295, - "learning_rate": 6.795820865422611e-06, - "loss": 0.435, - "step": 963 - }, - { - "epoch": 1.2327365728900257, - "grad_norm": 2.970610716051806, - "learning_rate": 6.776210372526373e-06, - "loss": 0.769, - "step": 964 - }, - { - "epoch": 1.2340153452685423, - "grad_norm": 2.835015624698343, - "learning_rate": 6.756613705584602e-06, - "loss": 0.7289, - "step": 965 - }, - { - "epoch": 1.2352941176470589, - "grad_norm": 1.729624246282208, - "learning_rate": 6.737030948642052e-06, - "loss": 0.3731, - "step": 966 - }, - { - "epoch": 1.2365728900255755, - "grad_norm": 1.6548846903268892, - "learning_rate": 6.717462185683829e-06, - "loss": 0.3838, - "step": 967 - }, - { - "epoch": 1.237851662404092, - "grad_norm": 2.934533149055867, - "learning_rate": 6.697907500635024e-06, - "loss": 0.7604, - "step": 968 - }, - { - "epoch": 1.2391304347826086, - "grad_norm": 2.9159366529957307, - "learning_rate": 6.678366977360344e-06, - "loss": 0.7636, - "step": 969 - }, - { - "epoch": 1.2404092071611252, - "grad_norm": 2.8143591112131494, - "learning_rate": 6.658840699663765e-06, - "loss": 0.7619, - "step": 970 - }, - { - "epoch": 1.241687979539642, - "grad_norm": 2.8404444119040906, - "learning_rate": 6.639328751288167e-06, - "loss": 0.7327, - "step": 971 - }, - { - "epoch": 1.2429667519181586, - "grad_norm": 2.8360230040508605, - "learning_rate": 6.619831215914974e-06, - "loss": 0.7641, - "step": 972 - }, - { - "epoch": 1.2442455242966752, - "grad_norm": 1.916259649092161, - "learning_rate": 6.600348177163797e-06, - "loss": 0.4477, - "step": 973 - }, - { - "epoch": 1.2455242966751918, - "grad_norm": 2.97713850131871, - "learning_rate": 6.580879718592079e-06, - "loss": 0.7384, - "step": 974 - }, - { - "epoch": 1.2468030690537084, - "grad_norm": 2.9688289283410034, - "learning_rate": 6.561425923694725e-06, - "loss": 0.7385, - "step": 975 - }, - { - "epoch": 1.248081841432225, - "grad_norm": 2.9309658151316964, - "learning_rate": 6.5419868759037555e-06, - "loss": 0.6859, - "step": 976 - }, - { - "epoch": 1.2493606138107416, - "grad_norm": 3.1279600272384895, - "learning_rate": 6.52256265858795e-06, - "loss": 0.8212, - "step": 977 - }, - { - "epoch": 1.2506393861892584, - "grad_norm": 3.0445466302472246, - "learning_rate": 6.503153355052471e-06, - "loss": 0.7171, - "step": 978 - }, - { - "epoch": 1.251918158567775, - "grad_norm": 3.044744620891503, - "learning_rate": 6.483759048538533e-06, - "loss": 0.7779, - "step": 979 - }, - { - "epoch": 1.2531969309462916, - "grad_norm": 3.1756437972796263, - "learning_rate": 6.464379822223028e-06, - "loss": 0.7881, - "step": 980 - }, - { - "epoch": 1.2544757033248082, - "grad_norm": 2.937678200672231, - "learning_rate": 6.44501575921817e-06, - "loss": 0.7519, - "step": 981 - }, - { - "epoch": 1.2557544757033248, - "grad_norm": 3.1584004261259353, - "learning_rate": 6.425666942571141e-06, - "loss": 0.8313, - "step": 982 - }, - { - "epoch": 1.2570332480818414, - "grad_norm": 3.017053686794488, - "learning_rate": 6.4063334552637465e-06, - "loss": 0.7698, - "step": 983 - }, - { - "epoch": 1.258312020460358, - "grad_norm": 2.811665324472737, - "learning_rate": 6.38701538021203e-06, - "loss": 0.721, - "step": 984 - }, - { - "epoch": 1.2595907928388748, - "grad_norm": 1.9444163583678316, - "learning_rate": 6.367712800265955e-06, - "loss": 0.4471, - "step": 985 - }, - { - "epoch": 1.2608695652173914, - "grad_norm": 1.9385263238957737, - "learning_rate": 6.348425798209017e-06, - "loss": 0.4264, - "step": 986 - }, - { - "epoch": 1.262148337595908, - "grad_norm": 1.7916691774532614, - "learning_rate": 6.329154456757914e-06, - "loss": 0.3914, - "step": 987 - }, - { - "epoch": 1.2634271099744245, - "grad_norm": 2.912528418046316, - "learning_rate": 6.309898858562169e-06, - "loss": 0.7342, - "step": 988 - }, - { - "epoch": 1.2647058823529411, - "grad_norm": 1.6325981069990103, - "learning_rate": 6.2906590862037874e-06, - "loss": 0.3773, - "step": 989 - }, - { - "epoch": 1.265984654731458, - "grad_norm": 2.958247604624272, - "learning_rate": 6.2714352221969155e-06, - "loss": 0.7757, - "step": 990 - }, - { - "epoch": 1.2672634271099743, - "grad_norm": 3.0198701894960593, - "learning_rate": 6.252227348987454e-06, - "loss": 0.7776, - "step": 991 - }, - { - "epoch": 1.2685421994884911, - "grad_norm": 2.968000085913659, - "learning_rate": 6.233035548952734e-06, - "loss": 0.7719, - "step": 992 - }, - { - "epoch": 1.2698209718670077, - "grad_norm": 2.9890259908262973, - "learning_rate": 6.213859904401156e-06, - "loss": 0.7821, - "step": 993 - }, - { - "epoch": 1.2710997442455243, - "grad_norm": 3.1828836967507956, - "learning_rate": 6.194700497571826e-06, - "loss": 0.7433, - "step": 994 - }, - { - "epoch": 1.272378516624041, - "grad_norm": 2.9033063921662046, - "learning_rate": 6.175557410634212e-06, - "loss": 0.7767, - "step": 995 - }, - { - "epoch": 1.2736572890025575, - "grad_norm": 3.0802193654554255, - "learning_rate": 6.1564307256878005e-06, - "loss": 0.7788, - "step": 996 - }, - { - "epoch": 1.2749360613810743, - "grad_norm": 2.899399685152301, - "learning_rate": 6.137320524761721e-06, - "loss": 0.7331, - "step": 997 - }, - { - "epoch": 1.2762148337595907, - "grad_norm": 2.930996098188369, - "learning_rate": 6.118226889814409e-06, - "loss": 0.778, - "step": 998 - }, - { - "epoch": 1.2774936061381075, - "grad_norm": 2.885373625092615, - "learning_rate": 6.099149902733269e-06, - "loss": 0.7417, - "step": 999 - }, - { - "epoch": 1.278772378516624, - "grad_norm": 3.141063615717511, - "learning_rate": 6.080089645334286e-06, - "loss": 0.7733, - "step": 1000 - }, - { - "epoch": 1.2800511508951407, - "grad_norm": 3.006876043918675, - "learning_rate": 6.061046199361706e-06, - "loss": 0.745, - "step": 1001 - }, - { - "epoch": 1.2813299232736572, - "grad_norm": 2.927517531992137, - "learning_rate": 6.042019646487685e-06, - "loss": 0.7404, - "step": 1002 - }, - { - "epoch": 1.2826086956521738, - "grad_norm": 3.0342662968978518, - "learning_rate": 6.023010068311905e-06, - "loss": 0.7917, - "step": 1003 - }, - { - "epoch": 1.2838874680306906, - "grad_norm": 2.8428240955153417, - "learning_rate": 6.004017546361272e-06, - "loss": 0.7431, - "step": 1004 - }, - { - "epoch": 1.2851662404092072, - "grad_norm": 2.9390998225525644, - "learning_rate": 5.985042162089529e-06, - "loss": 0.7518, - "step": 1005 - }, - { - "epoch": 1.2864450127877238, - "grad_norm": 3.1165449846252917, - "learning_rate": 5.966083996876922e-06, - "loss": 0.7258, - "step": 1006 - }, - { - "epoch": 1.2877237851662404, - "grad_norm": 2.7529744338987014, - "learning_rate": 5.947143132029853e-06, - "loss": 0.7346, - "step": 1007 - }, - { - "epoch": 1.289002557544757, - "grad_norm": 2.920066563506553, - "learning_rate": 5.9282196487805285e-06, - "loss": 0.7595, - "step": 1008 - }, - { - "epoch": 1.2902813299232736, - "grad_norm": 2.8120917303816793, - "learning_rate": 5.9093136282866014e-06, - "loss": 0.7006, - "step": 1009 - }, - { - "epoch": 1.2915601023017902, - "grad_norm": 2.826114382123849, - "learning_rate": 5.890425151630841e-06, - "loss": 0.6731, - "step": 1010 - }, - { - "epoch": 1.292838874680307, - "grad_norm": 2.396492029331963, - "learning_rate": 5.871554299820774e-06, - "loss": 0.4295, - "step": 1011 - }, - { - "epoch": 1.2941176470588236, - "grad_norm": 2.965649599552891, - "learning_rate": 5.8527011537883295e-06, - "loss": 0.7764, - "step": 1012 - }, - { - "epoch": 1.2953964194373402, - "grad_norm": 3.0706958803395876, - "learning_rate": 5.833865794389515e-06, - "loss": 0.7847, - "step": 1013 - }, - { - "epoch": 1.2966751918158568, - "grad_norm": 2.968422819291162, - "learning_rate": 5.8150483024040494e-06, - "loss": 0.738, - "step": 1014 - }, - { - "epoch": 1.2979539641943734, - "grad_norm": 2.806002280937191, - "learning_rate": 5.796248758535021e-06, - "loss": 0.7226, - "step": 1015 - }, - { - "epoch": 1.29923273657289, - "grad_norm": 2.9909600598564174, - "learning_rate": 5.77746724340855e-06, - "loss": 0.7593, - "step": 1016 - }, - { - "epoch": 1.3005115089514065, - "grad_norm": 2.8898628968345554, - "learning_rate": 5.7587038375734285e-06, - "loss": 0.6962, - "step": 1017 - }, - { - "epoch": 1.3017902813299234, - "grad_norm": 2.7792715788513616, - "learning_rate": 5.739958621500788e-06, - "loss": 0.7499, - "step": 1018 - }, - { - "epoch": 1.30306905370844, - "grad_norm": 2.665083851193987, - "learning_rate": 5.721231675583748e-06, - "loss": 0.69, - "step": 1019 - }, - { - "epoch": 1.3043478260869565, - "grad_norm": 2.935785351788553, - "learning_rate": 5.702523080137073e-06, - "loss": 0.7061, - "step": 1020 - }, - { - "epoch": 1.3056265984654731, - "grad_norm": 2.892466888056765, - "learning_rate": 5.683832915396823e-06, - "loss": 0.7044, - "step": 1021 - }, - { - "epoch": 1.3069053708439897, - "grad_norm": 3.1289570194307355, - "learning_rate": 5.665161261520021e-06, - "loss": 0.7112, - "step": 1022 - }, - { - "epoch": 1.3081841432225063, - "grad_norm": 2.7629222518601075, - "learning_rate": 5.6465081985843e-06, - "loss": 0.7075, - "step": 1023 - }, - { - "epoch": 1.309462915601023, - "grad_norm": 2.703856455426406, - "learning_rate": 5.627873806587549e-06, - "loss": 0.7168, - "step": 1024 - }, - { - "epoch": 1.3107416879795397, - "grad_norm": 1.9843402546424085, - "learning_rate": 5.609258165447602e-06, - "loss": 0.3993, - "step": 1025 - }, - { - "epoch": 1.3120204603580563, - "grad_norm": 1.9641184833702061, - "learning_rate": 5.59066135500187e-06, - "loss": 0.4099, - "step": 1026 - }, - { - "epoch": 1.313299232736573, - "grad_norm": 3.1174766139043846, - "learning_rate": 5.572083455006986e-06, - "loss": 0.7373, - "step": 1027 - }, - { - "epoch": 1.3145780051150895, - "grad_norm": 3.0462807242699332, - "learning_rate": 5.55352454513851e-06, - "loss": 0.7003, - "step": 1028 - }, - { - "epoch": 1.315856777493606, - "grad_norm": 3.0095203175197356, - "learning_rate": 5.534984704990545e-06, - "loss": 0.7084, - "step": 1029 - }, - { - "epoch": 1.317135549872123, - "grad_norm": 2.963968804866256, - "learning_rate": 5.516464014075396e-06, - "loss": 0.7387, - "step": 1030 - }, - { - "epoch": 1.3184143222506393, - "grad_norm": 3.0882090370488813, - "learning_rate": 5.497962551823266e-06, - "loss": 0.7658, - "step": 1031 - }, - { - "epoch": 1.319693094629156, - "grad_norm": 1.636422726268331, - "learning_rate": 5.479480397581884e-06, - "loss": 0.3826, - "step": 1032 - }, - { - "epoch": 1.3209718670076727, - "grad_norm": 2.995467304089546, - "learning_rate": 5.461017630616154e-06, - "loss": 0.7226, - "step": 1033 - }, - { - "epoch": 1.3222506393861893, - "grad_norm": 1.6366103121026268, - "learning_rate": 5.44257433010786e-06, - "loss": 0.3899, - "step": 1034 - }, - { - "epoch": 1.3235294117647058, - "grad_norm": 3.0608923433546567, - "learning_rate": 5.4241505751552896e-06, - "loss": 0.8245, - "step": 1035 - }, - { - "epoch": 1.3248081841432224, - "grad_norm": 2.9044260666356005, - "learning_rate": 5.405746444772888e-06, - "loss": 0.7866, - "step": 1036 - }, - { - "epoch": 1.3260869565217392, - "grad_norm": 3.0558151020829145, - "learning_rate": 5.387362017890967e-06, - "loss": 0.7439, - "step": 1037 - }, - { - "epoch": 1.3273657289002558, - "grad_norm": 3.0391685449486703, - "learning_rate": 5.368997373355316e-06, - "loss": 0.7469, - "step": 1038 - }, - { - "epoch": 1.3286445012787724, - "grad_norm": 1.6402939194771868, - "learning_rate": 5.350652589926874e-06, - "loss": 0.4108, - "step": 1039 - }, - { - "epoch": 1.329923273657289, - "grad_norm": 3.108923220255234, - "learning_rate": 5.3323277462814295e-06, - "loss": 0.7309, - "step": 1040 - }, - { - "epoch": 1.3312020460358056, - "grad_norm": 3.040351059195762, - "learning_rate": 5.314022921009237e-06, - "loss": 0.7167, - "step": 1041 - }, - { - "epoch": 1.3324808184143222, - "grad_norm": 1.774931361368431, - "learning_rate": 5.295738192614691e-06, - "loss": 0.4166, - "step": 1042 - }, - { - "epoch": 1.3337595907928388, - "grad_norm": 2.9458008858292817, - "learning_rate": 5.277473639516006e-06, - "loss": 0.7561, - "step": 1043 - }, - { - "epoch": 1.3350383631713556, - "grad_norm": 3.2750750919121914, - "learning_rate": 5.2592293400448806e-06, - "loss": 0.7677, - "step": 1044 - }, - { - "epoch": 1.3363171355498722, - "grad_norm": 2.871995348365531, - "learning_rate": 5.241005372446126e-06, - "loss": 0.6983, - "step": 1045 - }, - { - "epoch": 1.3375959079283888, - "grad_norm": 3.1126504194585065, - "learning_rate": 5.22280181487737e-06, - "loss": 0.714, - "step": 1046 - }, - { - "epoch": 1.3388746803069054, - "grad_norm": 3.015552376662682, - "learning_rate": 5.204618745408719e-06, - "loss": 0.7269, - "step": 1047 - }, - { - "epoch": 1.340153452685422, - "grad_norm": 1.689664757924787, - "learning_rate": 5.186456242022384e-06, - "loss": 0.3934, - "step": 1048 - }, - { - "epoch": 1.3414322250639386, - "grad_norm": 2.720559163809894, - "learning_rate": 5.168314382612391e-06, - "loss": 0.6424, - "step": 1049 - }, - { - "epoch": 1.3427109974424551, - "grad_norm": 2.921913883733814, - "learning_rate": 5.150193244984239e-06, - "loss": 0.7493, - "step": 1050 - }, - { - "epoch": 1.343989769820972, - "grad_norm": 1.6008907977172195, - "learning_rate": 5.1320929068545324e-06, - "loss": 0.3982, - "step": 1051 - }, - { - "epoch": 1.3452685421994885, - "grad_norm": 3.0037648330194857, - "learning_rate": 5.114013445850684e-06, - "loss": 0.7646, - "step": 1052 - }, - { - "epoch": 1.3465473145780051, - "grad_norm": 1.7940173547959377, - "learning_rate": 5.095954939510583e-06, - "loss": 0.4237, - "step": 1053 - }, - { - "epoch": 1.3478260869565217, - "grad_norm": 3.0016919093537986, - "learning_rate": 5.077917465282228e-06, - "loss": 0.6971, - "step": 1054 - }, - { - "epoch": 1.3491048593350383, - "grad_norm": 2.79143297903951, - "learning_rate": 5.0599011005234255e-06, - "loss": 0.6993, - "step": 1055 - }, - { - "epoch": 1.350383631713555, - "grad_norm": 3.0150765522474723, - "learning_rate": 5.04190592250146e-06, - "loss": 0.7343, - "step": 1056 - }, - { - "epoch": 1.3516624040920715, - "grad_norm": 2.9519224170419878, - "learning_rate": 5.023932008392733e-06, - "loss": 0.7938, - "step": 1057 - }, - { - "epoch": 1.3529411764705883, - "grad_norm": 3.0300741349079057, - "learning_rate": 5.00597943528246e-06, - "loss": 0.7635, - "step": 1058 - }, - { - "epoch": 1.354219948849105, - "grad_norm": 2.9375471149396155, - "learning_rate": 4.988048280164339e-06, - "loss": 0.764, - "step": 1059 - }, - { - "epoch": 1.3554987212276215, - "grad_norm": 1.685727873290131, - "learning_rate": 4.970138619940197e-06, - "loss": 0.3885, - "step": 1060 - }, - { - "epoch": 1.356777493606138, - "grad_norm": 2.756707415228499, - "learning_rate": 4.952250531419682e-06, - "loss": 0.7753, - "step": 1061 - }, - { - "epoch": 1.3580562659846547, - "grad_norm": 3.0361267842792827, - "learning_rate": 4.934384091319929e-06, - "loss": 0.6907, - "step": 1062 - }, - { - "epoch": 1.3593350383631715, - "grad_norm": 3.112956419983879, - "learning_rate": 4.9165393762652255e-06, - "loss": 0.763, - "step": 1063 - }, - { - "epoch": 1.3606138107416879, - "grad_norm": 2.933066191039036, - "learning_rate": 4.898716462786689e-06, - "loss": 0.7809, - "step": 1064 - }, - { - "epoch": 1.3618925831202047, - "grad_norm": 2.935152256688672, - "learning_rate": 4.880915427321933e-06, - "loss": 0.7966, - "step": 1065 - }, - { - "epoch": 1.3631713554987213, - "grad_norm": 2.939498479921398, - "learning_rate": 4.8631363462147444e-06, - "loss": 0.7301, - "step": 1066 - }, - { - "epoch": 1.3644501278772379, - "grad_norm": 2.887105602494912, - "learning_rate": 4.845379295714752e-06, - "loss": 0.7416, - "step": 1067 - }, - { - "epoch": 1.3657289002557544, - "grad_norm": 2.9532676562469113, - "learning_rate": 4.827644351977103e-06, - "loss": 0.6951, - "step": 1068 - }, - { - "epoch": 1.367007672634271, - "grad_norm": 2.773098911723423, - "learning_rate": 4.809931591062136e-06, - "loss": 0.7149, - "step": 1069 - }, - { - "epoch": 1.3682864450127878, - "grad_norm": 2.800778463513303, - "learning_rate": 4.7922410889350494e-06, - "loss": 0.7354, - "step": 1070 - }, - { - "epoch": 1.3695652173913042, - "grad_norm": 2.9597659568606813, - "learning_rate": 4.774572921465581e-06, - "loss": 0.7389, - "step": 1071 - }, - { - "epoch": 1.370843989769821, - "grad_norm": 1.5864075740603552, - "learning_rate": 4.756927164427685e-06, - "loss": 0.3858, - "step": 1072 - }, - { - "epoch": 1.3721227621483376, - "grad_norm": 1.9997754028155046, - "learning_rate": 4.7393038934992e-06, - "loss": 0.456, - "step": 1073 - }, - { - "epoch": 1.3734015345268542, - "grad_norm": 3.061139611683033, - "learning_rate": 4.721703184261522e-06, - "loss": 0.7295, - "step": 1074 - }, - { - "epoch": 1.3746803069053708, - "grad_norm": 3.159638701414426, - "learning_rate": 4.704125112199308e-06, - "loss": 0.7074, - "step": 1075 - }, - { - "epoch": 1.3759590792838874, - "grad_norm": 2.9289264217942432, - "learning_rate": 4.686569752700101e-06, - "loss": 0.6829, - "step": 1076 - }, - { - "epoch": 1.3772378516624042, - "grad_norm": 3.077441201267646, - "learning_rate": 4.6690371810540515e-06, - "loss": 0.7599, - "step": 1077 - }, - { - "epoch": 1.3785166240409208, - "grad_norm": 2.917917073757673, - "learning_rate": 4.651527472453586e-06, - "loss": 0.7254, - "step": 1078 - }, - { - "epoch": 1.3797953964194374, - "grad_norm": 3.052198703170526, - "learning_rate": 4.634040701993061e-06, - "loss": 0.7388, - "step": 1079 - }, - { - "epoch": 1.381074168797954, - "grad_norm": 3.0893637750613063, - "learning_rate": 4.616576944668468e-06, - "loss": 0.7223, - "step": 1080 - }, - { - "epoch": 1.3823529411764706, - "grad_norm": 3.1723635480113472, - "learning_rate": 4.599136275377098e-06, - "loss": 0.7587, - "step": 1081 - }, - { - "epoch": 1.3836317135549872, - "grad_norm": 3.0072341265914893, - "learning_rate": 4.581718768917228e-06, - "loss": 0.7064, - "step": 1082 - }, - { - "epoch": 1.3849104859335037, - "grad_norm": 3.1501927083728023, - "learning_rate": 4.56432449998779e-06, - "loss": 0.7141, - "step": 1083 - }, - { - "epoch": 1.3861892583120206, - "grad_norm": 3.356369774969209, - "learning_rate": 4.54695354318806e-06, - "loss": 0.7773, - "step": 1084 - }, - { - "epoch": 1.3874680306905371, - "grad_norm": 3.0061530628351796, - "learning_rate": 4.529605973017335e-06, - "loss": 0.8019, - "step": 1085 - }, - { - "epoch": 1.3887468030690537, - "grad_norm": 2.9591763813008165, - "learning_rate": 4.5122818638746114e-06, - "loss": 0.6979, - "step": 1086 - }, - { - "epoch": 1.3900255754475703, - "grad_norm": 2.970418927510644, - "learning_rate": 4.494981290058268e-06, - "loss": 0.715, - "step": 1087 - }, - { - "epoch": 1.391304347826087, - "grad_norm": 2.908538677759447, - "learning_rate": 4.477704325765748e-06, - "loss": 0.7099, - "step": 1088 - }, - { - "epoch": 1.3925831202046035, - "grad_norm": 2.952070115291308, - "learning_rate": 4.460451045093239e-06, - "loss": 0.7586, - "step": 1089 - }, - { - "epoch": 1.39386189258312, - "grad_norm": 2.9029224145297077, - "learning_rate": 4.443221522035357e-06, - "loss": 0.7275, - "step": 1090 - }, - { - "epoch": 1.395140664961637, - "grad_norm": 3.0938731136794515, - "learning_rate": 4.426015830484825e-06, - "loss": 0.7539, - "step": 1091 - }, - { - "epoch": 1.3964194373401535, - "grad_norm": 2.8926337760206406, - "learning_rate": 4.408834044232164e-06, - "loss": 0.7191, - "step": 1092 - }, - { - "epoch": 1.39769820971867, - "grad_norm": 1.5374449159948451, - "learning_rate": 4.391676236965369e-06, - "loss": 0.3842, - "step": 1093 - }, - { - "epoch": 1.3989769820971867, - "grad_norm": 2.8974599696504026, - "learning_rate": 4.374542482269593e-06, - "loss": 0.7322, - "step": 1094 - }, - { - "epoch": 1.4002557544757033, - "grad_norm": 2.694335358239732, - "learning_rate": 4.35743285362684e-06, - "loss": 0.745, - "step": 1095 - }, - { - "epoch": 1.40153452685422, - "grad_norm": 2.970705852718078, - "learning_rate": 4.340347424415639e-06, - "loss": 0.7237, - "step": 1096 - }, - { - "epoch": 1.4028132992327365, - "grad_norm": 2.737596061440401, - "learning_rate": 4.323286267910736e-06, - "loss": 0.6908, - "step": 1097 - }, - { - "epoch": 1.4040920716112533, - "grad_norm": 3.3309316075264084, - "learning_rate": 4.306249457282778e-06, - "loss": 0.7762, - "step": 1098 - }, - { - "epoch": 1.4053708439897699, - "grad_norm": 1.7037835244562842, - "learning_rate": 4.289237065598001e-06, - "loss": 0.3939, - "step": 1099 - }, - { - "epoch": 1.4066496163682864, - "grad_norm": 2.8626697146009703, - "learning_rate": 4.272249165817912e-06, - "loss": 0.6828, - "step": 1100 - }, - { - "epoch": 1.407928388746803, - "grad_norm": 3.1324732228534082, - "learning_rate": 4.2552858307989784e-06, - "loss": 0.7332, - "step": 1101 - }, - { - "epoch": 1.4092071611253196, - "grad_norm": 3.0509087366882, - "learning_rate": 4.238347133292321e-06, - "loss": 0.73, - "step": 1102 - }, - { - "epoch": 1.4104859335038364, - "grad_norm": 3.3036296036821504, - "learning_rate": 4.22143314594339e-06, - "loss": 0.742, - "step": 1103 - }, - { - "epoch": 1.4117647058823528, - "grad_norm": 2.9235180322677006, - "learning_rate": 4.204543941291666e-06, - "loss": 0.7191, - "step": 1104 - }, - { - "epoch": 1.4130434782608696, - "grad_norm": 1.7790358844202017, - "learning_rate": 4.187679591770341e-06, - "loss": 0.3796, - "step": 1105 - }, - { - "epoch": 1.4143222506393862, - "grad_norm": 3.022787670918728, - "learning_rate": 4.170840169706011e-06, - "loss": 0.7539, - "step": 1106 - }, - { - "epoch": 1.4156010230179028, - "grad_norm": 2.890055468686939, - "learning_rate": 4.154025747318363e-06, - "loss": 0.7136, - "step": 1107 - }, - { - "epoch": 1.4168797953964194, - "grad_norm": 1.5868313151137217, - "learning_rate": 4.137236396719871e-06, - "loss": 0.3823, - "step": 1108 - }, - { - "epoch": 1.418158567774936, - "grad_norm": 2.836760914898176, - "learning_rate": 4.120472189915479e-06, - "loss": 0.67, - "step": 1109 - }, - { - "epoch": 1.4194373401534528, - "grad_norm": 3.1144096635510565, - "learning_rate": 4.1037331988022975e-06, - "loss": 0.8135, - "step": 1110 - }, - { - "epoch": 1.4207161125319694, - "grad_norm": 2.96068182030603, - "learning_rate": 4.087019495169296e-06, - "loss": 0.7316, - "step": 1111 - }, - { - "epoch": 1.421994884910486, - "grad_norm": 2.8094126580744287, - "learning_rate": 4.070331150696988e-06, - "loss": 0.725, - "step": 1112 - }, - { - "epoch": 1.4232736572890026, - "grad_norm": 1.8801567791527827, - "learning_rate": 4.053668236957135e-06, - "loss": 0.419, - "step": 1113 - }, - { - "epoch": 1.4245524296675192, - "grad_norm": 3.1046087422394697, - "learning_rate": 4.037030825412429e-06, - "loss": 0.7087, - "step": 1114 - }, - { - "epoch": 1.4258312020460358, - "grad_norm": 2.987132343317035, - "learning_rate": 4.020418987416183e-06, - "loss": 0.7219, - "step": 1115 - }, - { - "epoch": 1.4271099744245523, - "grad_norm": 1.7477546489896265, - "learning_rate": 4.003832794212048e-06, - "loss": 0.3799, - "step": 1116 - }, - { - "epoch": 1.4283887468030692, - "grad_norm": 2.996616238025797, - "learning_rate": 3.987272316933686e-06, - "loss": 0.681, - "step": 1117 - }, - { - "epoch": 1.4296675191815857, - "grad_norm": 2.9137397364404998, - "learning_rate": 3.970737626604453e-06, - "loss": 0.7474, - "step": 1118 - }, - { - "epoch": 1.4309462915601023, - "grad_norm": 2.8947725848671024, - "learning_rate": 3.954228794137138e-06, - "loss": 0.6744, - "step": 1119 - }, - { - "epoch": 1.432225063938619, - "grad_norm": 2.995308740531203, - "learning_rate": 3.937745890333623e-06, - "loss": 0.732, - "step": 1120 - }, - { - "epoch": 1.4335038363171355, - "grad_norm": 3.1048365871357526, - "learning_rate": 3.9212889858845745e-06, - "loss": 0.7433, - "step": 1121 - }, - { - "epoch": 1.434782608695652, - "grad_norm": 2.8841938813192565, - "learning_rate": 3.904858151369178e-06, - "loss": 0.6743, - "step": 1122 - }, - { - "epoch": 1.4360613810741687, - "grad_norm": 1.781088299607186, - "learning_rate": 3.888453457254801e-06, - "loss": 0.4263, - "step": 1123 - }, - { - "epoch": 1.4373401534526855, - "grad_norm": 3.116068037256206, - "learning_rate": 3.872074973896693e-06, - "loss": 0.7325, - "step": 1124 - }, - { - "epoch": 1.438618925831202, - "grad_norm": 1.6085453167987824, - "learning_rate": 3.855722771537711e-06, - "loss": 0.382, - "step": 1125 - }, - { - "epoch": 1.4398976982097187, - "grad_norm": 3.1171927440228826, - "learning_rate": 3.839396920307993e-06, - "loss": 0.7085, - "step": 1126 - }, - { - "epoch": 1.4411764705882353, - "grad_norm": 3.0980009115635943, - "learning_rate": 3.823097490224651e-06, - "loss": 0.7512, - "step": 1127 - }, - { - "epoch": 1.4424552429667519, - "grad_norm": 3.027287912218153, - "learning_rate": 3.8068245511915048e-06, - "loss": 0.7401, - "step": 1128 - }, - { - "epoch": 1.4437340153452685, - "grad_norm": 3.0405603152044227, - "learning_rate": 3.7905781729987533e-06, - "loss": 0.698, - "step": 1129 - }, - { - "epoch": 1.445012787723785, - "grad_norm": 2.994842124413095, - "learning_rate": 3.7743584253226697e-06, - "loss": 0.8046, - "step": 1130 - }, - { - "epoch": 1.4462915601023019, - "grad_norm": 3.051303472832137, - "learning_rate": 3.758165377725338e-06, - "loss": 0.7135, - "step": 1131 - }, - { - "epoch": 1.4475703324808185, - "grad_norm": 2.7335583151104026, - "learning_rate": 3.741999099654324e-06, - "loss": 0.7497, - "step": 1132 - }, - { - "epoch": 1.448849104859335, - "grad_norm": 1.6132367854915686, - "learning_rate": 3.7258596604423756e-06, - "loss": 0.368, - "step": 1133 - }, - { - "epoch": 1.4501278772378516, - "grad_norm": 2.76477927458852, - "learning_rate": 3.7097471293071553e-06, - "loss": 0.6713, - "step": 1134 - }, - { - "epoch": 1.4514066496163682, - "grad_norm": 2.9601480191455267, - "learning_rate": 3.6936615753509143e-06, - "loss": 0.7303, - "step": 1135 - }, - { - "epoch": 1.452685421994885, - "grad_norm": 3.2681369189868263, - "learning_rate": 3.677603067560199e-06, - "loss": 0.7368, - "step": 1136 - }, - { - "epoch": 1.4539641943734014, - "grad_norm": 2.9687925284707766, - "learning_rate": 3.661571674805571e-06, - "loss": 0.7515, - "step": 1137 - }, - { - "epoch": 1.4552429667519182, - "grad_norm": 2.9721094462914, - "learning_rate": 3.6455674658413107e-06, - "loss": 0.731, - "step": 1138 - }, - { - "epoch": 1.4565217391304348, - "grad_norm": 3.1571695792437513, - "learning_rate": 3.6295905093050963e-06, - "loss": 0.7215, - "step": 1139 - }, - { - "epoch": 1.4578005115089514, - "grad_norm": 2.4630468983795075, - "learning_rate": 3.6136408737177354e-06, - "loss": 0.6078, - "step": 1140 - }, - { - "epoch": 1.459079283887468, - "grad_norm": 3.142845500326159, - "learning_rate": 3.597718627482876e-06, - "loss": 0.7718, - "step": 1141 - }, - { - "epoch": 1.4603580562659846, - "grad_norm": 2.859683770077125, - "learning_rate": 3.5818238388866788e-06, - "loss": 0.6802, - "step": 1142 - }, - { - "epoch": 1.4616368286445014, - "grad_norm": 1.6633994389774143, - "learning_rate": 3.5659565760975577e-06, - "loss": 0.389, - "step": 1143 - }, - { - "epoch": 1.4629156010230178, - "grad_norm": 2.8971217551796205, - "learning_rate": 3.550116907165886e-06, - "loss": 0.7026, - "step": 1144 - }, - { - "epoch": 1.4641943734015346, - "grad_norm": 3.0038205417451507, - "learning_rate": 3.534304900023672e-06, - "loss": 0.7374, - "step": 1145 - }, - { - "epoch": 1.4654731457800512, - "grad_norm": 2.9944935803091055, - "learning_rate": 3.518520622484303e-06, - "loss": 0.7283, - "step": 1146 - }, - { - "epoch": 1.4667519181585678, - "grad_norm": 3.0152447731827556, - "learning_rate": 3.5027641422422488e-06, - "loss": 0.7117, - "step": 1147 - }, - { - "epoch": 1.4680306905370843, - "grad_norm": 3.0184798690433903, - "learning_rate": 3.4870355268727473e-06, - "loss": 0.745, - "step": 1148 - }, - { - "epoch": 1.469309462915601, - "grad_norm": 2.8321008754769856, - "learning_rate": 3.47133484383154e-06, - "loss": 0.6939, - "step": 1149 - }, - { - "epoch": 1.4705882352941178, - "grad_norm": 3.0098522218754913, - "learning_rate": 3.4556621604545837e-06, - "loss": 0.6934, - "step": 1150 - }, - { - "epoch": 1.4718670076726343, - "grad_norm": 2.891069144055671, - "learning_rate": 3.4400175439577333e-06, - "loss": 0.7134, - "step": 1151 - }, - { - "epoch": 1.473145780051151, - "grad_norm": 3.406670866876871, - "learning_rate": 3.424401061436482e-06, - "loss": 0.7793, - "step": 1152 - }, - { - "epoch": 1.4744245524296675, - "grad_norm": 2.9529048724067395, - "learning_rate": 3.4088127798656746e-06, - "loss": 0.7421, - "step": 1153 - }, - { - "epoch": 1.4757033248081841, - "grad_norm": 3.2555491734546274, - "learning_rate": 3.3932527660991877e-06, - "loss": 0.7194, - "step": 1154 - }, - { - "epoch": 1.4769820971867007, - "grad_norm": 1.6092122373432687, - "learning_rate": 3.377721086869681e-06, - "loss": 0.3884, - "step": 1155 - }, - { - "epoch": 1.4782608695652173, - "grad_norm": 2.70920176111324, - "learning_rate": 3.3622178087882908e-06, - "loss": 0.6874, - "step": 1156 - }, - { - "epoch": 1.479539641943734, - "grad_norm": 3.143087856805746, - "learning_rate": 3.3467429983443477e-06, - "loss": 0.7773, - "step": 1157 - }, - { - "epoch": 1.4808184143222507, - "grad_norm": 1.629353686346811, - "learning_rate": 3.331296721905095e-06, - "loss": 0.4228, - "step": 1158 - }, - { - "epoch": 1.4820971867007673, - "grad_norm": 3.063516841056697, - "learning_rate": 3.315879045715397e-06, - "loss": 0.7196, - "step": 1159 - }, - { - "epoch": 1.4833759590792839, - "grad_norm": 2.659652468827191, - "learning_rate": 3.3004900358974635e-06, - "loss": 0.6903, - "step": 1160 - }, - { - "epoch": 1.4846547314578005, - "grad_norm": 2.5924786763461185, - "learning_rate": 3.2851297584505604e-06, - "loss": 0.6644, - "step": 1161 - }, - { - "epoch": 1.485933503836317, - "grad_norm": 2.9741320286289676, - "learning_rate": 3.2697982792507275e-06, - "loss": 0.7226, - "step": 1162 - }, - { - "epoch": 1.4872122762148337, - "grad_norm": 2.8043187515632293, - "learning_rate": 3.254495664050498e-06, - "loss": 0.7244, - "step": 1163 - }, - { - "epoch": 1.4884910485933505, - "grad_norm": 2.863372024995821, - "learning_rate": 3.239221978478615e-06, - "loss": 0.7648, - "step": 1164 - }, - { - "epoch": 1.489769820971867, - "grad_norm": 1.7239474671856871, - "learning_rate": 3.223977288039748e-06, - "loss": 0.4209, - "step": 1165 - }, - { - "epoch": 1.4910485933503836, - "grad_norm": 1.5105999957186527, - "learning_rate": 3.2087616581142246e-06, - "loss": 0.4085, - "step": 1166 - }, - { - "epoch": 1.4923273657289002, - "grad_norm": 2.7949818054395674, - "learning_rate": 3.193575153957722e-06, - "loss": 0.6967, - "step": 1167 - }, - { - "epoch": 1.4936061381074168, - "grad_norm": 2.739830570987631, - "learning_rate": 3.1784178407010157e-06, - "loss": 0.7393, - "step": 1168 - }, - { - "epoch": 1.4948849104859336, - "grad_norm": 3.153045900677, - "learning_rate": 3.1632897833496977e-06, - "loss": 0.7187, - "step": 1169 - }, - { - "epoch": 1.49616368286445, - "grad_norm": 3.0932272208681697, - "learning_rate": 3.1481910467838694e-06, - "loss": 0.7357, - "step": 1170 - }, - { - "epoch": 1.4974424552429668, - "grad_norm": 1.8330067259801939, - "learning_rate": 3.133121695757896e-06, - "loss": 0.3876, - "step": 1171 - }, - { - "epoch": 1.4987212276214834, - "grad_norm": 1.7714740805936697, - "learning_rate": 3.1180817949001217e-06, - "loss": 0.41, - "step": 1172 - }, - { - "epoch": 1.5, - "grad_norm": 3.061287463939666, - "learning_rate": 3.1030714087125703e-06, - "loss": 0.7856, - "step": 1173 - }, - { - "epoch": 1.5012787723785166, - "grad_norm": 1.5770414100136119, - "learning_rate": 3.0880906015706967e-06, - "loss": 0.3844, - "step": 1174 - }, - { - "epoch": 1.5025575447570332, - "grad_norm": 1.60878697094977, - "learning_rate": 3.0731394377230994e-06, - "loss": 0.3628, - "step": 1175 - }, - { - "epoch": 1.50383631713555, - "grad_norm": 2.8670810286749013, - "learning_rate": 3.0582179812912393e-06, - "loss": 0.7144, - "step": 1176 - }, - { - "epoch": 1.5051150895140664, - "grad_norm": 2.7843680960636417, - "learning_rate": 3.0433262962691755e-06, - "loss": 0.7192, - "step": 1177 - }, - { - "epoch": 1.5063938618925832, - "grad_norm": 1.6541484809779587, - "learning_rate": 3.0284644465232824e-06, - "loss": 0.3989, - "step": 1178 - }, - { - "epoch": 1.5076726342710998, - "grad_norm": 2.9158728148542825, - "learning_rate": 3.0136324957919814e-06, - "loss": 0.6955, - "step": 1179 - }, - { - "epoch": 1.5089514066496164, - "grad_norm": 3.214229835106005, - "learning_rate": 2.998830507685463e-06, - "loss": 0.7798, - "step": 1180 - }, - { - "epoch": 1.510230179028133, - "grad_norm": 2.983223879015512, - "learning_rate": 2.9840585456854176e-06, - "loss": 0.7265, - "step": 1181 - }, - { - "epoch": 1.5115089514066495, - "grad_norm": 1.6954797942408013, - "learning_rate": 2.969316673144761e-06, - "loss": 0.3917, - "step": 1182 - }, - { - "epoch": 1.5127877237851663, - "grad_norm": 2.860972135139691, - "learning_rate": 2.9546049532873645e-06, - "loss": 0.6839, - "step": 1183 - }, - { - "epoch": 1.5140664961636827, - "grad_norm": 3.0081364777044883, - "learning_rate": 2.93992344920778e-06, - "loss": 0.7927, - "step": 1184 - }, - { - "epoch": 1.5153452685421995, - "grad_norm": 3.1162384411045974, - "learning_rate": 2.9252722238709776e-06, - "loss": 0.7587, - "step": 1185 - }, - { - "epoch": 1.5166240409207161, - "grad_norm": 2.8271045667884764, - "learning_rate": 2.9106513401120638e-06, - "loss": 0.7237, - "step": 1186 - }, - { - "epoch": 1.5179028132992327, - "grad_norm": 1.842860947943581, - "learning_rate": 2.896060860636024e-06, - "loss": 0.4197, - "step": 1187 - }, - { - "epoch": 1.5191815856777495, - "grad_norm": 2.9882901199921235, - "learning_rate": 2.8815008480174433e-06, - "loss": 0.7193, - "step": 1188 - }, - { - "epoch": 1.520460358056266, - "grad_norm": 2.672269083407042, - "learning_rate": 2.866971364700246e-06, - "loss": 0.6602, - "step": 1189 - }, - { - "epoch": 1.5217391304347827, - "grad_norm": 1.7709809618847312, - "learning_rate": 2.852472472997423e-06, - "loss": 0.3856, - "step": 1190 - }, - { - "epoch": 1.523017902813299, - "grad_norm": 3.1263904374989218, - "learning_rate": 2.8380042350907655e-06, - "loss": 0.7398, - "step": 1191 - }, - { - "epoch": 1.5242966751918159, - "grad_norm": 2.936225784774328, - "learning_rate": 2.823566713030601e-06, - "loss": 0.7295, - "step": 1192 - }, - { - "epoch": 1.5255754475703325, - "grad_norm": 2.645971424764363, - "learning_rate": 2.8091599687355242e-06, - "loss": 0.6746, - "step": 1193 - }, - { - "epoch": 1.526854219948849, - "grad_norm": 2.9446102995665115, - "learning_rate": 2.7947840639921308e-06, - "loss": 0.6912, - "step": 1194 - }, - { - "epoch": 1.5281329923273659, - "grad_norm": 3.0160512505955044, - "learning_rate": 2.780439060454756e-06, - "loss": 0.6786, - "step": 1195 - }, - { - "epoch": 1.5294117647058822, - "grad_norm": 1.6824225262290882, - "learning_rate": 2.766125019645208e-06, - "loss": 0.3965, - "step": 1196 - }, - { - "epoch": 1.530690537084399, - "grad_norm": 2.430648644192302, - "learning_rate": 2.7518420029525027e-06, - "loss": 0.6499, - "step": 1197 - }, - { - "epoch": 1.5319693094629157, - "grad_norm": 3.144595448511122, - "learning_rate": 2.7375900716326052e-06, - "loss": 0.7534, - "step": 1198 - }, - { - "epoch": 1.5332480818414322, - "grad_norm": 2.9925619358624944, - "learning_rate": 2.7233692868081607e-06, - "loss": 0.7353, - "step": 1199 - }, - { - "epoch": 1.5345268542199488, - "grad_norm": 3.073917162186909, - "learning_rate": 2.709179709468236e-06, - "loss": 0.7527, - "step": 1200 - }, - { - "epoch": 1.5358056265984654, - "grad_norm": 2.8473449403069147, - "learning_rate": 2.6950214004680596e-06, - "loss": 0.6921, - "step": 1201 - }, - { - "epoch": 1.5370843989769822, - "grad_norm": 2.9931782235493807, - "learning_rate": 2.6808944205287566e-06, - "loss": 0.7207, - "step": 1202 - }, - { - "epoch": 1.5383631713554986, - "grad_norm": 2.8164936776495924, - "learning_rate": 2.6667988302370907e-06, - "loss": 0.7179, - "step": 1203 - }, - { - "epoch": 1.5396419437340154, - "grad_norm": 3.249597218572636, - "learning_rate": 2.6527346900452056e-06, - "loss": 0.7102, - "step": 1204 - }, - { - "epoch": 1.540920716112532, - "grad_norm": 1.6125582158810252, - "learning_rate": 2.6387020602703616e-06, - "loss": 0.3642, - "step": 1205 - }, - { - "epoch": 1.5421994884910486, - "grad_norm": 1.5863258555938284, - "learning_rate": 2.6247010010946807e-06, - "loss": 0.3787, - "step": 1206 - }, - { - "epoch": 1.5434782608695652, - "grad_norm": 2.9682335213759914, - "learning_rate": 2.6107315725648876e-06, - "loss": 0.7266, - "step": 1207 - }, - { - "epoch": 1.5447570332480818, - "grad_norm": 2.891903933835182, - "learning_rate": 2.596793834592053e-06, - "loss": 0.7382, - "step": 1208 - }, - { - "epoch": 1.5460358056265986, - "grad_norm": 2.9591433833152316, - "learning_rate": 2.5828878469513265e-06, - "loss": 0.7525, - "step": 1209 - }, - { - "epoch": 1.547314578005115, - "grad_norm": 3.1039762876325394, - "learning_rate": 2.5690136692817048e-06, - "loss": 0.7717, - "step": 1210 - }, - { - "epoch": 1.5485933503836318, - "grad_norm": 3.068330622187725, - "learning_rate": 2.555171361085751e-06, - "loss": 0.7043, - "step": 1211 - }, - { - "epoch": 1.5498721227621484, - "grad_norm": 2.8910371438837714, - "learning_rate": 2.541360981729343e-06, - "loss": 0.708, - "step": 1212 - }, - { - "epoch": 1.551150895140665, - "grad_norm": 2.813856916514164, - "learning_rate": 2.527582590441436e-06, - "loss": 0.6974, - "step": 1213 - }, - { - "epoch": 1.5524296675191815, - "grad_norm": 2.9531869469930836, - "learning_rate": 2.5138362463137964e-06, - "loss": 0.74, - "step": 1214 - }, - { - "epoch": 1.5537084398976981, - "grad_norm": 2.9854194419060356, - "learning_rate": 2.500122008300735e-06, - "loss": 0.7234, - "step": 1215 - }, - { - "epoch": 1.554987212276215, - "grad_norm": 2.8588343464020807, - "learning_rate": 2.4864399352188874e-06, - "loss": 0.7195, - "step": 1216 - }, - { - "epoch": 1.5562659846547313, - "grad_norm": 3.053074267256655, - "learning_rate": 2.4727900857469312e-06, - "loss": 0.7134, - "step": 1217 - }, - { - "epoch": 1.5575447570332481, - "grad_norm": 2.7588836665265775, - "learning_rate": 2.459172518425341e-06, - "loss": 0.667, - "step": 1218 - }, - { - "epoch": 1.5588235294117647, - "grad_norm": 2.843087349401971, - "learning_rate": 2.4455872916561584e-06, - "loss": 0.6824, - "step": 1219 - }, - { - "epoch": 1.5601023017902813, - "grad_norm": 3.419400663868211, - "learning_rate": 2.432034463702715e-06, - "loss": 0.7059, - "step": 1220 - }, - { - "epoch": 1.561381074168798, - "grad_norm": 2.8527819498959524, - "learning_rate": 2.4185140926893845e-06, - "loss": 0.6806, - "step": 1221 - }, - { - "epoch": 1.5626598465473145, - "grad_norm": 2.889515338822536, - "learning_rate": 2.40502623660136e-06, - "loss": 0.7426, - "step": 1222 - }, - { - "epoch": 1.5639386189258313, - "grad_norm": 2.951412389665457, - "learning_rate": 2.3915709532843766e-06, - "loss": 0.693, - "step": 1223 - }, - { - "epoch": 1.5652173913043477, - "grad_norm": 1.817600183555667, - "learning_rate": 2.378148300444467e-06, - "loss": 0.3929, - "step": 1224 - }, - { - "epoch": 1.5664961636828645, - "grad_norm": 1.7231829846912587, - "learning_rate": 2.364758335647738e-06, - "loss": 0.3686, - "step": 1225 - }, - { - "epoch": 1.567774936061381, - "grad_norm": 3.0420341175951227, - "learning_rate": 2.351401116320093e-06, - "loss": 0.6802, - "step": 1226 - }, - { - "epoch": 1.5690537084398977, - "grad_norm": 3.0970466039887325, - "learning_rate": 2.3380766997470017e-06, - "loss": 0.7283, - "step": 1227 - }, - { - "epoch": 1.5703324808184145, - "grad_norm": 3.0443570859173414, - "learning_rate": 2.3247851430732495e-06, - "loss": 0.7226, - "step": 1228 - }, - { - "epoch": 1.5716112531969308, - "grad_norm": 2.9024506725843477, - "learning_rate": 2.3115265033027066e-06, - "loss": 0.7084, - "step": 1229 - }, - { - "epoch": 1.5728900255754477, - "grad_norm": 2.9261059163401457, - "learning_rate": 2.2983008372980553e-06, - "loss": 0.7211, - "step": 1230 - }, - { - "epoch": 1.5741687979539642, - "grad_norm": 3.062765731072134, - "learning_rate": 2.2851082017805704e-06, - "loss": 0.7281, - "step": 1231 - }, - { - "epoch": 1.5754475703324808, - "grad_norm": 1.540383843039863, - "learning_rate": 2.2719486533298753e-06, - "loss": 0.3799, - "step": 1232 - }, - { - "epoch": 1.5767263427109974, - "grad_norm": 2.9257083439445624, - "learning_rate": 2.2588222483836743e-06, - "loss": 0.6846, - "step": 1233 - }, - { - "epoch": 1.578005115089514, - "grad_norm": 2.6724068480298793, - "learning_rate": 2.245729043237541e-06, - "loss": 0.6918, - "step": 1234 - }, - { - "epoch": 1.5792838874680308, - "grad_norm": 3.0634007139607653, - "learning_rate": 2.2326690940446684e-06, - "loss": 0.7213, - "step": 1235 - }, - { - "epoch": 1.5805626598465472, - "grad_norm": 2.7385021705761634, - "learning_rate": 2.2196424568156073e-06, - "loss": 0.6673, - "step": 1236 - }, - { - "epoch": 1.581841432225064, - "grad_norm": 1.8217216903700808, - "learning_rate": 2.206649187418053e-06, - "loss": 0.4354, - "step": 1237 - }, - { - "epoch": 1.5831202046035806, - "grad_norm": 3.2958105542571965, - "learning_rate": 2.1936893415766025e-06, - "loss": 0.814, - "step": 1238 - }, - { - "epoch": 1.5843989769820972, - "grad_norm": 2.77379210639995, - "learning_rate": 2.180762974872491e-06, - "loss": 0.7247, - "step": 1239 - }, - { - "epoch": 1.5856777493606138, - "grad_norm": 2.9879436541687587, - "learning_rate": 2.16787014274338e-06, - "loss": 0.7326, - "step": 1240 - }, - { - "epoch": 1.5869565217391304, - "grad_norm": 2.8901249600409407, - "learning_rate": 2.15501090048312e-06, - "loss": 0.7096, - "step": 1241 - }, - { - "epoch": 1.5882352941176472, - "grad_norm": 3.1882904311460116, - "learning_rate": 2.142185303241483e-06, - "loss": 0.6988, - "step": 1242 - }, - { - "epoch": 1.5895140664961636, - "grad_norm": 2.9660716085902528, - "learning_rate": 2.1293934060239595e-06, - "loss": 0.7567, - "step": 1243 - }, - { - "epoch": 1.5907928388746804, - "grad_norm": 2.9995473012505984, - "learning_rate": 2.1166352636915155e-06, - "loss": 0.717, - "step": 1244 - }, - { - "epoch": 1.592071611253197, - "grad_norm": 3.1631948443541527, - "learning_rate": 2.103910930960336e-06, - "loss": 0.7106, - "step": 1245 - }, - { - "epoch": 1.5933503836317136, - "grad_norm": 3.172476220461664, - "learning_rate": 2.091220462401612e-06, - "loss": 0.7656, - "step": 1246 - }, - { - "epoch": 1.5946291560102301, - "grad_norm": 3.0162880596441743, - "learning_rate": 2.0785639124413116e-06, - "loss": 0.7671, - "step": 1247 - }, - { - "epoch": 1.5959079283887467, - "grad_norm": 1.6412689711023924, - "learning_rate": 2.065941335359918e-06, - "loss": 0.4205, - "step": 1248 - }, - { - "epoch": 1.5971867007672635, - "grad_norm": 3.0030146095795134, - "learning_rate": 2.053352785292222e-06, - "loss": 0.7192, - "step": 1249 - }, - { - "epoch": 1.59846547314578, - "grad_norm": 2.8338880411729095, - "learning_rate": 2.040798316227085e-06, - "loss": 0.7292, - "step": 1250 - }, - { - "epoch": 1.5997442455242967, - "grad_norm": 3.034707087967203, - "learning_rate": 2.0282779820071973e-06, - "loss": 0.7358, - "step": 1251 - }, - { - "epoch": 1.6010230179028133, - "grad_norm": 1.6369508861071806, - "learning_rate": 2.015791836328861e-06, - "loss": 0.3712, - "step": 1252 - }, - { - "epoch": 1.60230179028133, - "grad_norm": 3.070520689438242, - "learning_rate": 2.0033399327417437e-06, - "loss": 0.7674, - "step": 1253 - }, - { - "epoch": 1.6035805626598465, - "grad_norm": 3.0737568681577314, - "learning_rate": 1.990922324648673e-06, - "loss": 0.7033, - "step": 1254 - }, - { - "epoch": 1.604859335038363, - "grad_norm": 3.0899351298245525, - "learning_rate": 1.978539065305376e-06, - "loss": 0.7333, - "step": 1255 - }, - { - "epoch": 1.60613810741688, - "grad_norm": 2.8286434914869676, - "learning_rate": 1.9661902078202744e-06, - "loss": 0.6831, - "step": 1256 - }, - { - "epoch": 1.6074168797953963, - "grad_norm": 2.68534750415217, - "learning_rate": 1.953875805154256e-06, - "loss": 0.659, - "step": 1257 - }, - { - "epoch": 1.608695652173913, - "grad_norm": 3.248015019642227, - "learning_rate": 1.9415959101204294e-06, - "loss": 0.7208, - "step": 1258 - }, - { - "epoch": 1.6099744245524297, - "grad_norm": 1.6319771580936209, - "learning_rate": 1.929350575383916e-06, - "loss": 0.3768, - "step": 1259 - }, - { - "epoch": 1.6112531969309463, - "grad_norm": 1.5113539777002745, - "learning_rate": 1.9171398534616214e-06, - "loss": 0.3675, - "step": 1260 - }, - { - "epoch": 1.612531969309463, - "grad_norm": 1.691761199033544, - "learning_rate": 1.9049637967219968e-06, - "loss": 0.3913, - "step": 1261 - }, - { - "epoch": 1.6138107416879794, - "grad_norm": 3.0401815637158602, - "learning_rate": 1.8928224573848265e-06, - "loss": 0.7054, - "step": 1262 - }, - { - "epoch": 1.6150895140664963, - "grad_norm": 1.6669421342861377, - "learning_rate": 1.880715887521013e-06, - "loss": 0.4009, - "step": 1263 - }, - { - "epoch": 1.6163682864450126, - "grad_norm": 2.9180486578198144, - "learning_rate": 1.8686441390523247e-06, - "loss": 0.7133, - "step": 1264 - }, - { - "epoch": 1.6176470588235294, - "grad_norm": 1.7092289161228402, - "learning_rate": 1.8566072637511968e-06, - "loss": 0.3836, - "step": 1265 - }, - { - "epoch": 1.618925831202046, - "grad_norm": 3.01330533487007, - "learning_rate": 1.8446053132405129e-06, - "loss": 0.6767, - "step": 1266 - }, - { - "epoch": 1.6202046035805626, - "grad_norm": 3.2009136810880565, - "learning_rate": 1.832638338993359e-06, - "loss": 0.7336, - "step": 1267 - }, - { - "epoch": 1.6214833759590794, - "grad_norm": 1.7889579829293831, - "learning_rate": 1.820706392332824e-06, - "loss": 0.3972, - "step": 1268 - }, - { - "epoch": 1.6227621483375958, - "grad_norm": 1.879863529726697, - "learning_rate": 1.808809524431775e-06, - "loss": 0.4087, - "step": 1269 - }, - { - "epoch": 1.6240409207161126, - "grad_norm": 2.927752128225438, - "learning_rate": 1.7969477863126327e-06, - "loss": 0.7101, - "step": 1270 - }, - { - "epoch": 1.6253196930946292, - "grad_norm": 3.060866830518965, - "learning_rate": 1.7851212288471575e-06, - "loss": 0.7145, - "step": 1271 - }, - { - "epoch": 1.6265984654731458, - "grad_norm": 2.6399632094121, - "learning_rate": 1.773329902756228e-06, - "loss": 0.6752, - "step": 1272 - }, - { - "epoch": 1.6278772378516624, - "grad_norm": 1.619344903132719, - "learning_rate": 1.7615738586096266e-06, - "loss": 0.3778, - "step": 1273 - }, - { - "epoch": 1.629156010230179, - "grad_norm": 2.9893698240549025, - "learning_rate": 1.7498531468258184e-06, - "loss": 0.6649, - "step": 1274 - }, - { - "epoch": 1.6304347826086958, - "grad_norm": 3.0904184619312685, - "learning_rate": 1.738167817671742e-06, - "loss": 0.6473, - "step": 1275 - }, - { - "epoch": 1.6317135549872122, - "grad_norm": 2.829473903244602, - "learning_rate": 1.726517921262586e-06, - "loss": 0.7206, - "step": 1276 - }, - { - "epoch": 1.632992327365729, - "grad_norm": 3.1127305680028767, - "learning_rate": 1.7149035075615795e-06, - "loss": 0.7302, - "step": 1277 - }, - { - "epoch": 1.6342710997442456, - "grad_norm": 2.7767027041427337, - "learning_rate": 1.7033246263797742e-06, - "loss": 0.6833, - "step": 1278 - }, - { - "epoch": 1.6355498721227621, - "grad_norm": 1.7390470019365556, - "learning_rate": 1.6917813273758331e-06, - "loss": 0.4119, - "step": 1279 - }, - { - "epoch": 1.6368286445012787, - "grad_norm": 2.75129241930074, - "learning_rate": 1.680273660055819e-06, - "loss": 0.6826, - "step": 1280 - }, - { - "epoch": 1.6381074168797953, - "grad_norm": 3.046158702769351, - "learning_rate": 1.6688016737729773e-06, - "loss": 0.7509, - "step": 1281 - }, - { - "epoch": 1.6393861892583121, - "grad_norm": 3.005370704596122, - "learning_rate": 1.657365417727529e-06, - "loss": 0.7094, - "step": 1282 - }, - { - "epoch": 1.6406649616368285, - "grad_norm": 2.8721564646606, - "learning_rate": 1.6459649409664568e-06, - "loss": 0.7348, - "step": 1283 - }, - { - "epoch": 1.6419437340153453, - "grad_norm": 2.9171237306064, - "learning_rate": 1.634600292383296e-06, - "loss": 0.7241, - "step": 1284 - }, - { - "epoch": 1.643222506393862, - "grad_norm": 3.1006316488180747, - "learning_rate": 1.623271520717925e-06, - "loss": 0.7279, - "step": 1285 - }, - { - "epoch": 1.6445012787723785, - "grad_norm": 3.0889689363918573, - "learning_rate": 1.6119786745563549e-06, - "loss": 0.7389, - "step": 1286 - }, - { - "epoch": 1.645780051150895, - "grad_norm": 1.6813443670820858, - "learning_rate": 1.6007218023305248e-06, - "loss": 0.3821, - "step": 1287 - }, - { - "epoch": 1.6470588235294117, - "grad_norm": 1.604419544006611, - "learning_rate": 1.5895009523180882e-06, - "loss": 0.4008, - "step": 1288 - }, - { - "epoch": 1.6483375959079285, - "grad_norm": 2.8318489634710886, - "learning_rate": 1.5783161726422102e-06, - "loss": 0.6606, - "step": 1289 - }, - { - "epoch": 1.6496163682864449, - "grad_norm": 2.9045871636925393, - "learning_rate": 1.5671675112713614e-06, - "loss": 0.717, - "step": 1290 - }, - { - "epoch": 1.6508951406649617, - "grad_norm": 3.132328779384499, - "learning_rate": 1.556055016019109e-06, - "loss": 0.6836, - "step": 1291 - }, - { - "epoch": 1.6521739130434783, - "grad_norm": 2.9947062564513267, - "learning_rate": 1.5449787345439137e-06, - "loss": 0.7559, - "step": 1292 - }, - { - "epoch": 1.6534526854219949, - "grad_norm": 1.5264687656078133, - "learning_rate": 1.533938714348928e-06, - "loss": 0.3626, - "step": 1293 - }, - { - "epoch": 1.6547314578005117, - "grad_norm": 2.781967917949177, - "learning_rate": 1.5229350027817847e-06, - "loss": 0.6399, - "step": 1294 - }, - { - "epoch": 1.656010230179028, - "grad_norm": 3.1510507119206013, - "learning_rate": 1.5119676470344037e-06, - "loss": 0.6656, - "step": 1295 - }, - { - "epoch": 1.6572890025575449, - "grad_norm": 3.167605212330853, - "learning_rate": 1.5010366941427823e-06, - "loss": 0.7506, - "step": 1296 - }, - { - "epoch": 1.6585677749360612, - "grad_norm": 1.896947727382689, - "learning_rate": 1.4901421909867952e-06, - "loss": 0.4097, - "step": 1297 - }, - { - "epoch": 1.659846547314578, - "grad_norm": 1.62226224180371, - "learning_rate": 1.4792841842899963e-06, - "loss": 0.4182, - "step": 1298 - }, - { - "epoch": 1.6611253196930946, - "grad_norm": 2.9987422878162002, - "learning_rate": 1.4684627206194135e-06, - "loss": 0.7332, - "step": 1299 - }, - { - "epoch": 1.6624040920716112, - "grad_norm": 2.856961469323562, - "learning_rate": 1.4576778463853546e-06, - "loss": 0.6915, - "step": 1300 - }, - { - "epoch": 1.663682864450128, - "grad_norm": 2.8603387577999873, - "learning_rate": 1.4469296078412032e-06, - "loss": 0.631, - "step": 1301 - }, - { - "epoch": 1.6649616368286444, - "grad_norm": 1.681938546697316, - "learning_rate": 1.4362180510832246e-06, - "loss": 0.3904, - "step": 1302 - }, - { - "epoch": 1.6662404092071612, - "grad_norm": 2.84791632507541, - "learning_rate": 1.4255432220503574e-06, - "loss": 0.7301, - "step": 1303 - }, - { - "epoch": 1.6675191815856778, - "grad_norm": 2.8041703546711094, - "learning_rate": 1.41490516652404e-06, - "loss": 0.7299, - "step": 1304 - }, - { - "epoch": 1.6687979539641944, - "grad_norm": 3.098405127185539, - "learning_rate": 1.4043039301279904e-06, - "loss": 0.7095, - "step": 1305 - }, - { - "epoch": 1.670076726342711, - "grad_norm": 3.203097120874668, - "learning_rate": 1.3937395583280133e-06, - "loss": 0.75, - "step": 1306 - }, - { - "epoch": 1.6713554987212276, - "grad_norm": 3.02878888864889, - "learning_rate": 1.3832120964318252e-06, - "loss": 0.6515, - "step": 1307 - }, - { - "epoch": 1.6726342710997444, - "grad_norm": 3.1682607624257932, - "learning_rate": 1.372721589588839e-06, - "loss": 0.7171, - "step": 1308 - }, - { - "epoch": 1.6739130434782608, - "grad_norm": 2.843182569593337, - "learning_rate": 1.3622680827899693e-06, - "loss": 0.7334, - "step": 1309 - }, - { - "epoch": 1.6751918158567776, - "grad_norm": 1.5860048870838965, - "learning_rate": 1.3518516208674638e-06, - "loss": 0.3953, - "step": 1310 - }, - { - "epoch": 1.6764705882352942, - "grad_norm": 2.9853695517747547, - "learning_rate": 1.3414722484946862e-06, - "loss": 0.6454, - "step": 1311 - }, - { - "epoch": 1.6777493606138107, - "grad_norm": 2.968242624367223, - "learning_rate": 1.331130010185928e-06, - "loss": 0.6812, - "step": 1312 - }, - { - "epoch": 1.6790281329923273, - "grad_norm": 2.8575948412666663, - "learning_rate": 1.3208249502962345e-06, - "loss": 0.6792, - "step": 1313 - }, - { - "epoch": 1.680306905370844, - "grad_norm": 2.9326623023342044, - "learning_rate": 1.310557113021196e-06, - "loss": 0.7067, - "step": 1314 - }, - { - "epoch": 1.6815856777493607, - "grad_norm": 1.673243710255272, - "learning_rate": 1.3003265423967615e-06, - "loss": 0.3715, - "step": 1315 - }, - { - "epoch": 1.682864450127877, - "grad_norm": 2.966133523607717, - "learning_rate": 1.2901332822990632e-06, - "loss": 0.6846, - "step": 1316 - }, - { - "epoch": 1.684143222506394, - "grad_norm": 2.8581018556108653, - "learning_rate": 1.2799773764442136e-06, - "loss": 0.6749, - "step": 1317 - }, - { - "epoch": 1.6854219948849105, - "grad_norm": 1.8057811062250093, - "learning_rate": 1.2698588683881185e-06, - "loss": 0.4012, - "step": 1318 - }, - { - "epoch": 1.686700767263427, - "grad_norm": 1.87561633142222, - "learning_rate": 1.259777801526303e-06, - "loss": 0.4226, - "step": 1319 - }, - { - "epoch": 1.6879795396419437, - "grad_norm": 1.7226992344630752, - "learning_rate": 1.2497342190937155e-06, - "loss": 0.3767, - "step": 1320 - }, - { - "epoch": 1.6892583120204603, - "grad_norm": 3.2022662342745405, - "learning_rate": 1.2397281641645364e-06, - "loss": 0.6672, - "step": 1321 - }, - { - "epoch": 1.690537084398977, - "grad_norm": 1.6889176109886446, - "learning_rate": 1.2297596796520062e-06, - "loss": 0.3886, - "step": 1322 - }, - { - "epoch": 1.6918158567774935, - "grad_norm": 1.5565471262913477, - "learning_rate": 1.2198288083082432e-06, - "loss": 0.3691, - "step": 1323 - }, - { - "epoch": 1.6930946291560103, - "grad_norm": 2.9937331786160306, - "learning_rate": 1.2099355927240397e-06, - "loss": 0.7139, - "step": 1324 - }, - { - "epoch": 1.6943734015345269, - "grad_norm": 1.8097496280725405, - "learning_rate": 1.200080075328699e-06, - "loss": 0.3635, - "step": 1325 - }, - { - "epoch": 1.6956521739130435, - "grad_norm": 2.981751859713283, - "learning_rate": 1.1902622983898527e-06, - "loss": 0.6862, - "step": 1326 - }, - { - "epoch": 1.69693094629156, - "grad_norm": 2.8673927874506457, - "learning_rate": 1.180482304013264e-06, - "loss": 0.6352, - "step": 1327 - }, - { - "epoch": 1.6982097186700766, - "grad_norm": 2.9324329034264647, - "learning_rate": 1.1707401341426594e-06, - "loss": 0.7413, - "step": 1328 - }, - { - "epoch": 1.6994884910485935, - "grad_norm": 3.098340427362859, - "learning_rate": 1.1610358305595548e-06, - "loss": 0.6912, - "step": 1329 - }, - { - "epoch": 1.7007672634271098, - "grad_norm": 1.6089002546118145, - "learning_rate": 1.1513694348830572e-06, - "loss": 0.3326, - "step": 1330 - }, - { - "epoch": 1.7020460358056266, - "grad_norm": 2.7854907319177014, - "learning_rate": 1.1417409885696996e-06, - "loss": 0.6941, - "step": 1331 - }, - { - "epoch": 1.7033248081841432, - "grad_norm": 2.8991233190940466, - "learning_rate": 1.1321505329132687e-06, - "loss": 0.7235, - "step": 1332 - }, - { - "epoch": 1.7046035805626598, - "grad_norm": 3.1219032002945473, - "learning_rate": 1.122598109044608e-06, - "loss": 0.7963, - "step": 1333 - }, - { - "epoch": 1.7058823529411766, - "grad_norm": 3.051775103519699, - "learning_rate": 1.1130837579314568e-06, - "loss": 0.6722, - "step": 1334 - }, - { - "epoch": 1.707161125319693, - "grad_norm": 2.922884879438813, - "learning_rate": 1.1036075203782782e-06, - "loss": 0.7099, - "step": 1335 - }, - { - "epoch": 1.7084398976982098, - "grad_norm": 2.650131815283204, - "learning_rate": 1.0941694370260658e-06, - "loss": 0.6947, - "step": 1336 - }, - { - "epoch": 1.7097186700767262, - "grad_norm": 1.645468935889948, - "learning_rate": 1.0847695483521836e-06, - "loss": 0.426, - "step": 1337 - }, - { - "epoch": 1.710997442455243, - "grad_norm": 2.997261169455876, - "learning_rate": 1.0754078946701974e-06, - "loss": 0.7098, - "step": 1338 - }, - { - "epoch": 1.7122762148337596, - "grad_norm": 3.067876986540899, - "learning_rate": 1.0660845161296807e-06, - "loss": 0.6796, - "step": 1339 - }, - { - "epoch": 1.7135549872122762, - "grad_norm": 1.6334159868615312, - "learning_rate": 1.0567994527160619e-06, - "loss": 0.3959, - "step": 1340 - }, - { - "epoch": 1.714833759590793, - "grad_norm": 2.657001476202506, - "learning_rate": 1.047552744250444e-06, - "loss": 0.6794, - "step": 1341 - }, - { - "epoch": 1.7161125319693094, - "grad_norm": 2.923707707468093, - "learning_rate": 1.0383444303894453e-06, - "loss": 0.7124, - "step": 1342 - }, - { - "epoch": 1.7173913043478262, - "grad_norm": 2.904806530048361, - "learning_rate": 1.0291745506250051e-06, - "loss": 0.6696, - "step": 1343 - }, - { - "epoch": 1.7186700767263428, - "grad_norm": 1.617948373717517, - "learning_rate": 1.0200431442842363e-06, - "loss": 0.3475, - "step": 1344 - }, - { - "epoch": 1.7199488491048593, - "grad_norm": 3.0329197075859886, - "learning_rate": 1.0109502505292568e-06, - "loss": 0.7206, - "step": 1345 - }, - { - "epoch": 1.721227621483376, - "grad_norm": 1.6676967030349383, - "learning_rate": 1.0018959083570024e-06, - "loss": 0.3897, - "step": 1346 - }, - { - "epoch": 1.7225063938618925, - "grad_norm": 2.87871664827466, - "learning_rate": 9.928801565990775e-07, - "loss": 0.7168, - "step": 1347 - }, - { - "epoch": 1.7237851662404093, - "grad_norm": 3.0064367846927986, - "learning_rate": 9.83903033921586e-07, - "loss": 0.7108, - "step": 1348 - }, - { - "epoch": 1.7250639386189257, - "grad_norm": 2.7110391776022165, - "learning_rate": 9.749645788249561e-07, - "loss": 0.6542, - "step": 1349 - }, - { - "epoch": 1.7263427109974425, - "grad_norm": 3.057701327776003, - "learning_rate": 9.660648296437814e-07, - "loss": 0.7271, - "step": 1350 - }, - { - "epoch": 1.727621483375959, - "grad_norm": 3.092373986144162, - "learning_rate": 9.572038245466664e-07, - "loss": 0.6644, - "step": 1351 - }, - { - "epoch": 1.7289002557544757, - "grad_norm": 2.9809760723132577, - "learning_rate": 9.483816015360381e-07, - "loss": 0.7257, - "step": 1352 - }, - { - "epoch": 1.7301790281329923, - "grad_norm": 2.88939086141906, - "learning_rate": 9.395981984480051e-07, - "loss": 0.6686, - "step": 1353 - }, - { - "epoch": 1.7314578005115089, - "grad_norm": 1.6111977719424015, - "learning_rate": 9.308536529521938e-07, - "loss": 0.3775, - "step": 1354 - }, - { - "epoch": 1.7327365728900257, - "grad_norm": 2.8388921048755855, - "learning_rate": 9.221480025515694e-07, - "loss": 0.7462, - "step": 1355 - }, - { - "epoch": 1.734015345268542, - "grad_norm": 3.2706080652910927, - "learning_rate": 9.134812845822915e-07, - "loss": 0.7417, - "step": 1356 - }, - { - "epoch": 1.7352941176470589, - "grad_norm": 1.7584052543682978, - "learning_rate": 9.048535362135546e-07, - "loss": 0.4601, - "step": 1357 - }, - { - "epoch": 1.7365728900255755, - "grad_norm": 2.8217133400554832, - "learning_rate": 8.962647944474145e-07, - "loss": 0.7294, - "step": 1358 - }, - { - "epoch": 1.737851662404092, - "grad_norm": 2.7996911712810104, - "learning_rate": 8.87715096118642e-07, - "loss": 0.6724, - "step": 1359 - }, - { - "epoch": 1.7391304347826086, - "grad_norm": 3.0003335506835254, - "learning_rate": 8.792044778945652e-07, - "loss": 0.748, - "step": 1360 - }, - { - "epoch": 1.7404092071611252, - "grad_norm": 2.9836627481539333, - "learning_rate": 8.707329762749017e-07, - "loss": 0.6973, - "step": 1361 - }, - { - "epoch": 1.741687979539642, - "grad_norm": 2.894574135558406, - "learning_rate": 8.623006275916102e-07, - "loss": 0.687, - "step": 1362 - }, - { - "epoch": 1.7429667519181584, - "grad_norm": 3.0506909054518845, - "learning_rate": 8.539074680087367e-07, - "loss": 0.7472, - "step": 1363 - }, - { - "epoch": 1.7442455242966752, - "grad_norm": 2.7741211626272446, - "learning_rate": 8.45553533522252e-07, - "loss": 0.6692, - "step": 1364 - }, - { - "epoch": 1.7455242966751918, - "grad_norm": 1.6970470275289637, - "learning_rate": 8.372388599599046e-07, - "loss": 0.3725, - "step": 1365 - }, - { - "epoch": 1.7468030690537084, - "grad_norm": 3.07310311813446, - "learning_rate": 8.2896348298106e-07, - "loss": 0.7061, - "step": 1366 - }, - { - "epoch": 1.7480818414322252, - "grad_norm": 3.0874764424272287, - "learning_rate": 8.207274380765529e-07, - "loss": 0.6656, - "step": 1367 - }, - { - "epoch": 1.7493606138107416, - "grad_norm": 2.9461733740054243, - "learning_rate": 8.125307605685351e-07, - "loss": 0.7306, - "step": 1368 - }, - { - "epoch": 1.7506393861892584, - "grad_norm": 2.8272826206113235, - "learning_rate": 8.043734856103191e-07, - "loss": 0.6791, - "step": 1369 - }, - { - "epoch": 1.7519181585677748, - "grad_norm": 2.889124788053903, - "learning_rate": 7.962556481862338e-07, - "loss": 0.6798, - "step": 1370 - }, - { - "epoch": 1.7531969309462916, - "grad_norm": 3.1663462968417773, - "learning_rate": 7.881772831114697e-07, - "loss": 0.7226, - "step": 1371 - }, - { - "epoch": 1.7544757033248082, - "grad_norm": 1.7289641145180044, - "learning_rate": 7.80138425031931e-07, - "loss": 0.3969, - "step": 1372 - }, - { - "epoch": 1.7557544757033248, - "grad_norm": 1.729719563157422, - "learning_rate": 7.721391084240881e-07, - "loss": 0.3968, - "step": 1373 - }, - { - "epoch": 1.7570332480818416, - "grad_norm": 2.7885078854446688, - "learning_rate": 7.641793675948272e-07, - "loss": 0.6371, - "step": 1374 - }, - { - "epoch": 1.758312020460358, - "grad_norm": 1.736638225038752, - "learning_rate": 7.56259236681306e-07, - "loss": 0.4255, - "step": 1375 - }, - { - "epoch": 1.7595907928388748, - "grad_norm": 2.7429037915306465, - "learning_rate": 7.483787496508065e-07, - "loss": 0.6434, - "step": 1376 - }, - { - "epoch": 1.7608695652173914, - "grad_norm": 3.0269359411182495, - "learning_rate": 7.405379403005875e-07, - "loss": 0.6659, - "step": 1377 - }, - { - "epoch": 1.762148337595908, - "grad_norm": 2.9121863158475225, - "learning_rate": 7.32736842257743e-07, - "loss": 0.7307, - "step": 1378 - }, - { - "epoch": 1.7634271099744245, - "grad_norm": 1.7701511124205984, - "learning_rate": 7.249754889790539e-07, - "loss": 0.4, - "step": 1379 - }, - { - "epoch": 1.7647058823529411, - "grad_norm": 1.6097448709176725, - "learning_rate": 7.172539137508472e-07, - "loss": 0.3939, - "step": 1380 - }, - { - "epoch": 1.765984654731458, - "grad_norm": 2.9183117600189576, - "learning_rate": 7.095721496888541e-07, - "loss": 0.6776, - "step": 1381 - }, - { - "epoch": 1.7672634271099743, - "grad_norm": 2.9744039364226076, - "learning_rate": 7.019302297380659e-07, - "loss": 0.6956, - "step": 1382 - }, - { - "epoch": 1.7685421994884911, - "grad_norm": 2.838610773516852, - "learning_rate": 6.943281866725915e-07, - "loss": 0.6993, - "step": 1383 - }, - { - "epoch": 1.7698209718670077, - "grad_norm": 2.9375168530114877, - "learning_rate": 6.867660530955211e-07, - "loss": 0.7213, - "step": 1384 - }, - { - "epoch": 1.7710997442455243, - "grad_norm": 2.875896491332056, - "learning_rate": 6.792438614387842e-07, - "loss": 0.7147, - "step": 1385 - }, - { - "epoch": 1.772378516624041, - "grad_norm": 1.634431639376817, - "learning_rate": 6.717616439630071e-07, - "loss": 0.412, - "step": 1386 - }, - { - "epoch": 1.7736572890025575, - "grad_norm": 2.8170750901926787, - "learning_rate": 6.64319432757381e-07, - "loss": 0.6994, - "step": 1387 - }, - { - "epoch": 1.7749360613810743, - "grad_norm": 2.7915501268573255, - "learning_rate": 6.569172597395202e-07, - "loss": 0.702, - "step": 1388 - }, - { - "epoch": 1.7762148337595907, - "grad_norm": 2.9667390497927966, - "learning_rate": 6.495551566553249e-07, - "loss": 0.6881, - "step": 1389 - }, - { - "epoch": 1.7774936061381075, - "grad_norm": 3.084617638315583, - "learning_rate": 6.422331550788486e-07, - "loss": 0.7391, - "step": 1390 - }, - { - "epoch": 1.778772378516624, - "grad_norm": 3.0479036382532088, - "learning_rate": 6.349512864121588e-07, - "loss": 0.68, - "step": 1391 - }, - { - "epoch": 1.7800511508951407, - "grad_norm": 2.759083114766142, - "learning_rate": 6.277095818852031e-07, - "loss": 0.685, - "step": 1392 - }, - { - "epoch": 1.7813299232736572, - "grad_norm": 2.9933313804095074, - "learning_rate": 6.205080725556778e-07, - "loss": 0.669, - "step": 1393 - }, - { - "epoch": 1.7826086956521738, - "grad_norm": 2.9581016895582213, - "learning_rate": 6.133467893088929e-07, - "loss": 0.6834, - "step": 1394 - }, - { - "epoch": 1.7838874680306906, - "grad_norm": 2.8545261958146835, - "learning_rate": 6.062257628576396e-07, - "loss": 0.6863, - "step": 1395 - }, - { - "epoch": 1.785166240409207, - "grad_norm": 1.587562386413372, - "learning_rate": 5.99145023742057e-07, - "loss": 0.3985, - "step": 1396 - }, - { - "epoch": 1.7864450127877238, - "grad_norm": 2.896788442059605, - "learning_rate": 5.921046023295018e-07, - "loss": 0.7027, - "step": 1397 - }, - { - "epoch": 1.7877237851662404, - "grad_norm": 2.870951695176972, - "learning_rate": 5.851045288144253e-07, - "loss": 0.7114, - "step": 1398 - }, - { - "epoch": 1.789002557544757, - "grad_norm": 3.2639958334949752, - "learning_rate": 5.781448332182338e-07, - "loss": 0.7717, - "step": 1399 - }, - { - "epoch": 1.7902813299232738, - "grad_norm": 3.0225816513402317, - "learning_rate": 5.71225545389158e-07, - "loss": 0.723, - "step": 1400 - }, - { - "epoch": 1.7915601023017902, - "grad_norm": 3.2273235515158354, - "learning_rate": 5.643466950021426e-07, - "loss": 0.7795, - "step": 1401 - }, - { - "epoch": 1.792838874680307, - "grad_norm": 2.9997661582005235, - "learning_rate": 5.575083115586977e-07, - "loss": 0.6991, - "step": 1402 - }, - { - "epoch": 1.7941176470588234, - "grad_norm": 2.931644216741438, - "learning_rate": 5.507104243867834e-07, - "loss": 0.6679, - "step": 1403 - }, - { - "epoch": 1.7953964194373402, - "grad_norm": 2.9808278432994797, - "learning_rate": 5.439530626406874e-07, - "loss": 0.6866, - "step": 1404 - }, - { - "epoch": 1.7966751918158568, - "grad_norm": 2.891825814812659, - "learning_rate": 5.372362553008903e-07, - "loss": 0.6951, - "step": 1405 - }, - { - "epoch": 1.7979539641943734, - "grad_norm": 3.1026431260478793, - "learning_rate": 5.305600311739434e-07, - "loss": 0.6873, - "step": 1406 - }, - { - "epoch": 1.7992327365728902, - "grad_norm": 1.605333249265033, - "learning_rate": 5.239244188923554e-07, - "loss": 0.3623, - "step": 1407 - }, - { - "epoch": 1.8005115089514065, - "grad_norm": 2.8155991923494175, - "learning_rate": 5.173294469144574e-07, - "loss": 0.6542, - "step": 1408 - }, - { - "epoch": 1.8017902813299234, - "grad_norm": 3.3454409891377757, - "learning_rate": 5.107751435242802e-07, - "loss": 0.7535, - "step": 1409 - }, - { - "epoch": 1.80306905370844, - "grad_norm": 2.8217600828950804, - "learning_rate": 5.042615368314497e-07, - "loss": 0.6866, - "step": 1410 - }, - { - "epoch": 1.8043478260869565, - "grad_norm": 3.1995454545195328, - "learning_rate": 4.977886547710464e-07, - "loss": 0.6925, - "step": 1411 - }, - { - "epoch": 1.8056265984654731, - "grad_norm": 2.8450054527170505, - "learning_rate": 4.913565251034935e-07, - "loss": 0.7125, - "step": 1412 - }, - { - "epoch": 1.8069053708439897, - "grad_norm": 2.8797755558939278, - "learning_rate": 4.849651754144446e-07, - "loss": 0.656, - "step": 1413 - }, - { - "epoch": 1.8081841432225065, - "grad_norm": 1.765340469009311, - "learning_rate": 4.786146331146557e-07, - "loss": 0.4289, - "step": 1414 - }, - { - "epoch": 1.809462915601023, - "grad_norm": 3.1289465898511963, - "learning_rate": 4.7230492543986705e-07, - "loss": 0.7518, - "step": 1415 - }, - { - "epoch": 1.8107416879795397, - "grad_norm": 1.6838500997749961, - "learning_rate": 4.660360794506946e-07, - "loss": 0.4004, - "step": 1416 - }, - { - "epoch": 1.8120204603580563, - "grad_norm": 1.7462182707100071, - "learning_rate": 4.5980812203251236e-07, - "loss": 0.413, - "step": 1417 - }, - { - "epoch": 1.813299232736573, - "grad_norm": 2.7185488821638866, - "learning_rate": 4.5362107989532775e-07, - "loss": 0.6754, - "step": 1418 - }, - { - "epoch": 1.8145780051150895, - "grad_norm": 2.7094188561326473, - "learning_rate": 4.474749795736777e-07, - "loss": 0.6166, - "step": 1419 - }, - { - "epoch": 1.815856777493606, - "grad_norm": 2.963308470842208, - "learning_rate": 4.4136984742651266e-07, - "loss": 0.666, - "step": 1420 - }, - { - "epoch": 1.817135549872123, - "grad_norm": 2.990630179856055, - "learning_rate": 4.353057096370761e-07, - "loss": 0.6849, - "step": 1421 - }, - { - "epoch": 1.8184143222506393, - "grad_norm": 1.7114203887662542, - "learning_rate": 4.292825922128019e-07, - "loss": 0.384, - "step": 1422 - }, - { - "epoch": 1.819693094629156, - "grad_norm": 2.859028636642258, - "learning_rate": 4.2330052098520035e-07, - "loss": 0.7013, - "step": 1423 - }, - { - "epoch": 1.8209718670076727, - "grad_norm": 2.737840252254379, - "learning_rate": 4.1735952160974036e-07, - "loss": 0.7177, - "step": 1424 - }, - { - "epoch": 1.8222506393861893, - "grad_norm": 3.0038006006588995, - "learning_rate": 4.114596195657483e-07, - "loss": 0.7454, - "step": 1425 - }, - { - "epoch": 1.8235294117647058, - "grad_norm": 2.8453105202142197, - "learning_rate": 4.056008401562972e-07, - "loss": 0.6684, - "step": 1426 - }, - { - "epoch": 1.8248081841432224, - "grad_norm": 2.895596238696507, - "learning_rate": 3.9978320850809217e-07, - "loss": 0.6657, - "step": 1427 - }, - { - "epoch": 1.8260869565217392, - "grad_norm": 2.9468684530678, - "learning_rate": 3.940067495713673e-07, - "loss": 0.6981, - "step": 1428 - }, - { - "epoch": 1.8273657289002556, - "grad_norm": 3.060351086805715, - "learning_rate": 3.8827148811978467e-07, - "loss": 0.6598, - "step": 1429 - }, - { - "epoch": 1.8286445012787724, - "grad_norm": 3.1815378814615016, - "learning_rate": 3.8257744875031087e-07, - "loss": 0.7315, - "step": 1430 - }, - { - "epoch": 1.829923273657289, - "grad_norm": 1.7642274407974106, - "learning_rate": 3.7692465588312964e-07, - "loss": 0.3856, - "step": 1431 - }, - { - "epoch": 1.8312020460358056, - "grad_norm": 2.922227467808783, - "learning_rate": 3.7131313376152835e-07, - "loss": 0.684, - "step": 1432 - }, - { - "epoch": 1.8324808184143222, - "grad_norm": 1.743469593820137, - "learning_rate": 3.65742906451797e-07, - "loss": 0.3505, - "step": 1433 - }, - { - "epoch": 1.8337595907928388, - "grad_norm": 2.6776821805856246, - "learning_rate": 3.602139978431174e-07, - "loss": 0.6375, - "step": 1434 - }, - { - "epoch": 1.8350383631713556, - "grad_norm": 2.9139170872509212, - "learning_rate": 3.547264316474708e-07, - "loss": 0.621, - "step": 1435 - }, - { - "epoch": 1.836317135549872, - "grad_norm": 1.8128238431523116, - "learning_rate": 3.492802313995358e-07, - "loss": 0.3972, - "step": 1436 - }, - { - "epoch": 1.8375959079283888, - "grad_norm": 2.9914855363528, - "learning_rate": 3.438754204565764e-07, - "loss": 0.7161, - "step": 1437 - }, - { - "epoch": 1.8388746803069054, - "grad_norm": 2.9355547402350073, - "learning_rate": 3.3851202199835173e-07, - "loss": 0.6848, - "step": 1438 - }, - { - "epoch": 1.840153452685422, - "grad_norm": 1.5918493866039585, - "learning_rate": 3.3319005902702097e-07, - "loss": 0.3908, - "step": 1439 - }, - { - "epoch": 1.8414322250639388, - "grad_norm": 3.1173803295429123, - "learning_rate": 3.279095543670252e-07, - "loss": 0.7224, - "step": 1440 - }, - { - "epoch": 1.8427109974424551, - "grad_norm": 2.5842725263563695, - "learning_rate": 3.226705306650113e-07, - "loss": 0.5906, - "step": 1441 - }, - { - "epoch": 1.843989769820972, - "grad_norm": 3.27620860834179, - "learning_rate": 3.17473010389725e-07, - "loss": 0.7088, - "step": 1442 - }, - { - "epoch": 1.8452685421994883, - "grad_norm": 1.7566505891220947, - "learning_rate": 3.1231701583190997e-07, - "loss": 0.3979, - "step": 1443 - }, - { - "epoch": 1.8465473145780051, - "grad_norm": 1.5290857214778566, - "learning_rate": 3.072025691042213e-07, - "loss": 0.385, - "step": 1444 - }, - { - "epoch": 1.8478260869565217, - "grad_norm": 2.9033338658108048, - "learning_rate": 3.021296921411276e-07, - "loss": 0.7071, - "step": 1445 - }, - { - "epoch": 1.8491048593350383, - "grad_norm": 3.0655738229123592, - "learning_rate": 2.9709840669881364e-07, - "loss": 0.7196, - "step": 1446 - }, - { - "epoch": 1.8503836317135551, - "grad_norm": 3.059970777656809, - "learning_rate": 2.921087343550899e-07, - "loss": 0.7142, - "step": 1447 - }, - { - "epoch": 1.8516624040920715, - "grad_norm": 3.0345912057014695, - "learning_rate": 2.8716069650930325e-07, - "loss": 0.7189, - "step": 1448 - }, - { - "epoch": 1.8529411764705883, - "grad_norm": 3.042734445306754, - "learning_rate": 2.8225431438223427e-07, - "loss": 0.7182, - "step": 1449 - }, - { - "epoch": 1.854219948849105, - "grad_norm": 3.09151567870194, - "learning_rate": 2.7738960901601886e-07, - "loss": 0.7452, - "step": 1450 - }, - { - "epoch": 1.8554987212276215, - "grad_norm": 2.8977354908449953, - "learning_rate": 2.7256660127405356e-07, - "loss": 0.6887, - "step": 1451 - }, - { - "epoch": 1.856777493606138, - "grad_norm": 2.695269917224397, - "learning_rate": 2.677853118409024e-07, - "loss": 0.6862, - "step": 1452 - }, - { - "epoch": 1.8580562659846547, - "grad_norm": 2.9386905663248064, - "learning_rate": 2.6304576122221035e-07, - "loss": 0.6512, - "step": 1453 - }, - { - "epoch": 1.8593350383631715, - "grad_norm": 1.7776102868516062, - "learning_rate": 2.5834796974461785e-07, - "loss": 0.3765, - "step": 1454 - }, - { - "epoch": 1.8606138107416879, - "grad_norm": 3.0260212664158974, - "learning_rate": 2.5369195755567177e-07, - "loss": 0.7603, - "step": 1455 - }, - { - "epoch": 1.8618925831202047, - "grad_norm": 2.918936486611692, - "learning_rate": 2.490777446237391e-07, - "loss": 0.6918, - "step": 1456 - }, - { - "epoch": 1.8631713554987213, - "grad_norm": 3.119953673866332, - "learning_rate": 2.4450535073792026e-07, - "loss": 0.752, - "step": 1457 - }, - { - "epoch": 1.8644501278772379, - "grad_norm": 2.840275868437308, - "learning_rate": 2.399747955079645e-07, - "loss": 0.6608, - "step": 1458 - }, - { - "epoch": 1.8657289002557544, - "grad_norm": 2.891303843435713, - "learning_rate": 2.3548609836418823e-07, - "loss": 0.6737, - "step": 1459 - }, - { - "epoch": 1.867007672634271, - "grad_norm": 2.9155584392101486, - "learning_rate": 2.3103927855738896e-07, - "loss": 0.6945, - "step": 1460 - }, - { - "epoch": 1.8682864450127878, - "grad_norm": 2.8099262783452943, - "learning_rate": 2.2663435515876575e-07, - "loss": 0.6953, - "step": 1461 - }, - { - "epoch": 1.8695652173913042, - "grad_norm": 2.7662391718697745, - "learning_rate": 2.2227134705983145e-07, - "loss": 0.6671, - "step": 1462 - }, - { - "epoch": 1.870843989769821, - "grad_norm": 2.8058495295778627, - "learning_rate": 2.1795027297233818e-07, - "loss": 0.6896, - "step": 1463 - }, - { - "epoch": 1.8721227621483376, - "grad_norm": 2.883087325110408, - "learning_rate": 2.1367115142819527e-07, - "loss": 0.6742, - "step": 1464 - }, - { - "epoch": 1.8734015345268542, - "grad_norm": 3.1073034331613703, - "learning_rate": 2.0943400077938826e-07, - "loss": 0.7508, - "step": 1465 - }, - { - "epoch": 1.8746803069053708, - "grad_norm": 2.8748600771534165, - "learning_rate": 2.0523883919789877e-07, - "loss": 0.7011, - "step": 1466 - }, - { - "epoch": 1.8759590792838874, - "grad_norm": 3.212023131776242, - "learning_rate": 2.0108568467563149e-07, - "loss": 0.709, - "step": 1467 - }, - { - "epoch": 1.8772378516624042, - "grad_norm": 3.1693829675037115, - "learning_rate": 1.9697455502433515e-07, - "loss": 0.7158, - "step": 1468 - }, - { - "epoch": 1.8785166240409206, - "grad_norm": 2.990958360931792, - "learning_rate": 1.9290546787552046e-07, - "loss": 0.7267, - "step": 1469 - }, - { - "epoch": 1.8797953964194374, - "grad_norm": 2.794028187186831, - "learning_rate": 1.8887844068039453e-07, - "loss": 0.7454, - "step": 1470 - }, - { - "epoch": 1.881074168797954, - "grad_norm": 2.9359597554450887, - "learning_rate": 1.8489349070977768e-07, - "loss": 0.6296, - "step": 1471 - }, - { - "epoch": 1.8823529411764706, - "grad_norm": 2.671291960926189, - "learning_rate": 1.8095063505403466e-07, - "loss": 0.6934, - "step": 1472 - }, - { - "epoch": 1.8836317135549874, - "grad_norm": 3.0350697178977235, - "learning_rate": 1.7704989062299783e-07, - "loss": 0.6736, - "step": 1473 - }, - { - "epoch": 1.8849104859335037, - "grad_norm": 2.9657881819628638, - "learning_rate": 1.7319127414589409e-07, - "loss": 0.7015, - "step": 1474 - }, - { - "epoch": 1.8861892583120206, - "grad_norm": 2.8842316481767716, - "learning_rate": 1.6937480217127932e-07, - "loss": 0.6941, - "step": 1475 - }, - { - "epoch": 1.887468030690537, - "grad_norm": 3.1073074388688586, - "learning_rate": 1.6560049106696064e-07, - "loss": 0.6859, - "step": 1476 - }, - { - "epoch": 1.8887468030690537, - "grad_norm": 2.924852660781176, - "learning_rate": 1.6186835701992865e-07, - "loss": 0.6571, - "step": 1477 - }, - { - "epoch": 1.8900255754475703, - "grad_norm": 3.016446592045985, - "learning_rate": 1.581784160362887e-07, - "loss": 0.7513, - "step": 1478 - }, - { - "epoch": 1.891304347826087, - "grad_norm": 2.9826266237196757, - "learning_rate": 1.5453068394118975e-07, - "loss": 0.7152, - "step": 1479 - }, - { - "epoch": 1.8925831202046037, - "grad_norm": 2.9502729554147584, - "learning_rate": 1.5092517637876226e-07, - "loss": 0.6872, - "step": 1480 - }, - { - "epoch": 1.89386189258312, - "grad_norm": 1.7944882378588571, - "learning_rate": 1.473619088120426e-07, - "loss": 0.3936, - "step": 1481 - }, - { - "epoch": 1.895140664961637, - "grad_norm": 2.841024838571277, - "learning_rate": 1.4384089652291544e-07, - "loss": 0.6629, - "step": 1482 - }, - { - "epoch": 1.8964194373401535, - "grad_norm": 3.10021478753817, - "learning_rate": 1.403621546120415e-07, - "loss": 0.7554, - "step": 1483 - }, - { - "epoch": 1.89769820971867, - "grad_norm": 1.5847682037371111, - "learning_rate": 1.3692569799879428e-07, - "loss": 0.3837, - "step": 1484 - }, - { - "epoch": 1.8989769820971867, - "grad_norm": 1.462180613340267, - "learning_rate": 1.335315414212024e-07, - "loss": 0.351, - "step": 1485 - }, - { - "epoch": 1.9002557544757033, - "grad_norm": 1.4656583732346828, - "learning_rate": 1.3017969943587504e-07, - "loss": 0.3542, - "step": 1486 - }, - { - "epoch": 1.90153452685422, - "grad_norm": 2.820849586971327, - "learning_rate": 1.268701864179489e-07, - "loss": 0.6905, - "step": 1487 - }, - { - "epoch": 1.9028132992327365, - "grad_norm": 2.890649292184604, - "learning_rate": 1.2360301656102248e-07, - "loss": 0.6623, - "step": 1488 - }, - { - "epoch": 1.9040920716112533, - "grad_norm": 3.0000721425905206, - "learning_rate": 1.203782038770973e-07, - "loss": 0.6938, - "step": 1489 - }, - { - "epoch": 1.9053708439897699, - "grad_norm": 2.9828056634978917, - "learning_rate": 1.1719576219651585e-07, - "loss": 0.7128, - "step": 1490 - }, - { - "epoch": 1.9066496163682864, - "grad_norm": 3.098434525527747, - "learning_rate": 1.1405570516789922e-07, - "loss": 0.7218, - "step": 1491 - }, - { - "epoch": 1.907928388746803, - "grad_norm": 1.7395782889672162, - "learning_rate": 1.1095804625809837e-07, - "loss": 0.3986, - "step": 1492 - }, - { - "epoch": 1.9092071611253196, - "grad_norm": 2.792572128928557, - "learning_rate": 1.0790279875212861e-07, - "loss": 0.6659, - "step": 1493 - }, - { - "epoch": 1.9104859335038364, - "grad_norm": 2.909777172072571, - "learning_rate": 1.0488997575310967e-07, - "loss": 0.664, - "step": 1494 - }, - { - "epoch": 1.9117647058823528, - "grad_norm": 1.721324631800612, - "learning_rate": 1.0191959018222009e-07, - "loss": 0.3869, - "step": 1495 - }, - { - "epoch": 1.9130434782608696, - "grad_norm": 3.076075829292728, - "learning_rate": 9.899165477863293e-08, - "loss": 0.7088, - "step": 1496 - }, - { - "epoch": 1.9143222506393862, - "grad_norm": 1.5547122348722169, - "learning_rate": 9.610618209946466e-08, - "loss": 0.3811, - "step": 1497 - }, - { - "epoch": 1.9156010230179028, - "grad_norm": 2.7747758148320893, - "learning_rate": 9.326318451972071e-08, - "loss": 0.6416, - "step": 1498 - }, - { - "epoch": 1.9168797953964194, - "grad_norm": 2.9601984935258683, - "learning_rate": 9.046267423224232e-08, - "loss": 0.6933, - "step": 1499 - }, - { - "epoch": 1.918158567774936, - "grad_norm": 3.1131269667930535, - "learning_rate": 8.770466324765303e-08, - "loss": 0.6658, - "step": 1500 - }, - { - "epoch": 1.9194373401534528, - "grad_norm": 2.9843763521924966, - "learning_rate": 8.498916339431118e-08, - "loss": 0.7381, - "step": 1501 - }, - { - "epoch": 1.9207161125319692, - "grad_norm": 2.9551857789371945, - "learning_rate": 8.231618631825533e-08, - "loss": 0.7024, - "step": 1502 - }, - { - "epoch": 1.921994884910486, - "grad_norm": 2.9640633988461174, - "learning_rate": 7.968574348315439e-08, - "loss": 0.7229, - "step": 1503 - }, - { - "epoch": 1.9232736572890026, - "grad_norm": 3.2150341801435856, - "learning_rate": 7.709784617025983e-08, - "loss": 0.699, - "step": 1504 - }, - { - "epoch": 1.9245524296675192, - "grad_norm": 1.6659596056451151, - "learning_rate": 7.455250547835913e-08, - "loss": 0.4212, - "step": 1505 - }, - { - "epoch": 1.9258312020460358, - "grad_norm": 3.064476978891848, - "learning_rate": 7.204973232372126e-08, - "loss": 0.6297, - "step": 1506 - }, - { - "epoch": 1.9271099744245523, - "grad_norm": 2.881181122713157, - "learning_rate": 6.958953744006125e-08, - "loss": 0.7375, - "step": 1507 - }, - { - "epoch": 1.9283887468030692, - "grad_norm": 2.7740345216989715, - "learning_rate": 6.717193137848132e-08, - "loss": 0.6748, - "step": 1508 - }, - { - "epoch": 1.9296675191815855, - "grad_norm": 1.6230241716976352, - "learning_rate": 6.479692450743647e-08, - "loss": 0.3633, - "step": 1509 - }, - { - "epoch": 1.9309462915601023, - "grad_norm": 2.8632340911409044, - "learning_rate": 6.246452701268002e-08, - "loss": 0.6759, - "step": 1510 - }, - { - "epoch": 1.932225063938619, - "grad_norm": 2.6747043782932387, - "learning_rate": 6.017474889723374e-08, - "loss": 0.5888, - "step": 1511 - }, - { - "epoch": 1.9335038363171355, - "grad_norm": 2.997622659227604, - "learning_rate": 5.79275999813278e-08, - "loss": 0.6731, - "step": 1512 - }, - { - "epoch": 1.9347826086956523, - "grad_norm": 3.1030396452541567, - "learning_rate": 5.5723089902370854e-08, - "loss": 0.7181, - "step": 1513 - }, - { - "epoch": 1.9360613810741687, - "grad_norm": 1.6619423055150166, - "learning_rate": 5.356122811490783e-08, - "loss": 0.3881, - "step": 1514 - }, - { - "epoch": 1.9373401534526855, - "grad_norm": 2.773449328907613, - "learning_rate": 5.144202389057329e-08, - "loss": 0.6681, - "step": 1515 - }, - { - "epoch": 1.938618925831202, - "grad_norm": 3.0245150161276975, - "learning_rate": 4.9365486318054825e-08, - "loss": 0.7526, - "step": 1516 - }, - { - "epoch": 1.9398976982097187, - "grad_norm": 3.2613212625417383, - "learning_rate": 4.7331624303057485e-08, - "loss": 0.7433, - "step": 1517 - }, - { - "epoch": 1.9411764705882353, - "grad_norm": 3.0282878192497797, - "learning_rate": 4.534044656825942e-08, - "loss": 0.6983, - "step": 1518 - }, - { - "epoch": 1.9424552429667519, - "grad_norm": 3.0310054581999855, - "learning_rate": 4.339196165327963e-08, - "loss": 0.7415, - "step": 1519 - }, - { - "epoch": 1.9437340153452687, - "grad_norm": 2.8968605710553823, - "learning_rate": 4.148617791463805e-08, - "loss": 0.6689, - "step": 1520 - }, - { - "epoch": 1.945012787723785, - "grad_norm": 3.0046916984916097, - "learning_rate": 3.9623103525723294e-08, - "loss": 0.7428, - "step": 1521 - }, - { - "epoch": 1.9462915601023019, - "grad_norm": 1.729093728685049, - "learning_rate": 3.780274647674942e-08, - "loss": 0.3972, - "step": 1522 - }, - { - "epoch": 1.9475703324808185, - "grad_norm": 2.8361826941160606, - "learning_rate": 3.602511457473479e-08, - "loss": 0.6978, - "step": 1523 - }, - { - "epoch": 1.948849104859335, - "grad_norm": 3.247856519682909, - "learning_rate": 3.4290215443456566e-08, - "loss": 0.7389, - "step": 1524 - }, - { - "epoch": 1.9501278772378516, - "grad_norm": 1.8470508397112757, - "learning_rate": 3.259805652342407e-08, - "loss": 0.3828, - "step": 1525 - }, - { - "epoch": 1.9514066496163682, - "grad_norm": 2.949241335076512, - "learning_rate": 3.0948645071844366e-08, - "loss": 0.6489, - "step": 1526 - }, - { - "epoch": 1.952685421994885, - "grad_norm": 2.804687801652395, - "learning_rate": 2.9341988162595593e-08, - "loss": 0.7149, - "step": 1527 - }, - { - "epoch": 1.9539641943734014, - "grad_norm": 3.1195428320133702, - "learning_rate": 2.777809268618925e-08, - "loss": 0.7302, - "step": 1528 - }, - { - "epoch": 1.9552429667519182, - "grad_norm": 3.2195561135704978, - "learning_rate": 2.6256965349745754e-08, - "loss": 0.6926, - "step": 1529 - }, - { - "epoch": 1.9565217391304348, - "grad_norm": 1.5263460789476995, - "learning_rate": 2.4778612676967795e-08, - "loss": 0.3993, - "step": 1530 - }, - { - "epoch": 1.9578005115089514, - "grad_norm": 2.7917430742359355, - "learning_rate": 2.3343041008105916e-08, - "loss": 0.6904, - "step": 1531 - }, - { - "epoch": 1.959079283887468, - "grad_norm": 2.719639468415671, - "learning_rate": 2.1950256499934096e-08, - "loss": 0.7132, - "step": 1532 - }, - { - "epoch": 1.9603580562659846, - "grad_norm": 3.0923746371785366, - "learning_rate": 2.0600265125726438e-08, - "loss": 0.746, - "step": 1533 - }, - { - "epoch": 1.9616368286445014, - "grad_norm": 3.0068696519926252, - "learning_rate": 1.9293072675228284e-08, - "loss": 0.6595, - "step": 1534 - }, - { - "epoch": 1.9629156010230178, - "grad_norm": 1.818384136419238, - "learning_rate": 1.80286847546296e-08, - "loss": 0.4265, - "step": 1535 - }, - { - "epoch": 1.9641943734015346, - "grad_norm": 3.079575488052858, - "learning_rate": 1.680710678654718e-08, - "loss": 0.7432, - "step": 1536 - }, - { - "epoch": 1.9654731457800512, - "grad_norm": 2.806291514967577, - "learning_rate": 1.562834400999469e-08, - "loss": 0.6694, - "step": 1537 - }, - { - "epoch": 1.9667519181585678, - "grad_norm": 2.8419792407463302, - "learning_rate": 1.4492401480364903e-08, - "loss": 0.662, - "step": 1538 - }, - { - "epoch": 1.9680306905370843, - "grad_norm": 3.138705836552105, - "learning_rate": 1.3399284069405272e-08, - "loss": 0.6703, - "step": 1539 - }, - { - "epoch": 1.969309462915601, - "grad_norm": 2.9683361023152695, - "learning_rate": 1.2348996465199048e-08, - "loss": 0.6864, - "step": 1540 - }, - { - "epoch": 1.9705882352941178, - "grad_norm": 2.8622725700614513, - "learning_rate": 1.1341543172140868e-08, - "loss": 0.655, - "step": 1541 - }, - { - "epoch": 1.9718670076726341, - "grad_norm": 1.7801182742494335, - "learning_rate": 1.0376928510925643e-08, - "loss": 0.3766, - "step": 1542 - }, - { - "epoch": 1.973145780051151, - "grad_norm": 3.083840752824613, - "learning_rate": 9.455156618521922e-09, - "loss": 0.6812, - "step": 1543 - }, - { - "epoch": 1.9744245524296675, - "grad_norm": 2.8610104988143936, - "learning_rate": 8.576231448156336e-09, - "loss": 0.6483, - "step": 1544 - }, - { - "epoch": 1.9757033248081841, - "grad_norm": 2.7331898561952954, - "learning_rate": 7.740156769302509e-09, - "loss": 0.6733, - "step": 1545 - }, - { - "epoch": 1.976982097186701, - "grad_norm": 2.9383795524302716, - "learning_rate": 6.946936167653295e-09, - "loss": 0.6564, - "step": 1546 - }, - { - "epoch": 1.9782608695652173, - "grad_norm": 3.157052668229583, - "learning_rate": 6.1965730451174485e-09, - "loss": 0.7426, - "step": 1547 - }, - { - "epoch": 1.979539641943734, - "grad_norm": 2.9451011714738957, - "learning_rate": 5.489070619797421e-09, - "loss": 0.7381, - "step": 1548 - }, - { - "epoch": 1.9808184143222505, - "grad_norm": 2.9110811392905416, - "learning_rate": 4.824431925977147e-09, - "loss": 0.6609, - "step": 1549 - }, - { - "epoch": 1.9820971867007673, - "grad_norm": 1.6305125253771289, - "learning_rate": 4.202659814112053e-09, - "loss": 0.3901, - "step": 1550 - }, - { - "epoch": 1.9833759590792839, - "grad_norm": 3.0239560577936992, - "learning_rate": 3.6237569508135174e-09, - "loss": 0.7366, - "step": 1551 - }, - { - "epoch": 1.9846547314578005, - "grad_norm": 2.819136875260791, - "learning_rate": 3.087725818836651e-09, - "loss": 0.6678, - "step": 1552 - }, - { - "epoch": 1.9859335038363173, - "grad_norm": 1.7004521836030406, - "learning_rate": 2.594568717072532e-09, - "loss": 0.413, - "step": 1553 - }, - { - "epoch": 1.9872122762148337, - "grad_norm": 3.0497021883316475, - "learning_rate": 2.1442877605393207e-09, - "loss": 0.743, - "step": 1554 - }, - { - "epoch": 1.9884910485933505, - "grad_norm": 3.2115829821335873, - "learning_rate": 1.7368848803678285e-09, - "loss": 0.6991, - "step": 1555 - }, - { - "epoch": 1.989769820971867, - "grad_norm": 2.952743514018284, - "learning_rate": 1.3723618237981851e-09, - "loss": 0.7266, - "step": 1556 - }, - { - "epoch": 1.9910485933503836, - "grad_norm": 2.847923192368732, - "learning_rate": 1.0507201541698486e-09, - "loss": 0.7347, - "step": 1557 - }, - { - "epoch": 1.9923273657289002, - "grad_norm": 1.5422964439594649, - "learning_rate": 7.719612509182739e-10, - "loss": 0.364, - "step": 1558 - }, - { - "epoch": 1.9936061381074168, - "grad_norm": 2.8414552956991663, - "learning_rate": 5.360863095615898e-10, - "loss": 0.6956, - "step": 1559 - }, - { - "epoch": 1.9948849104859336, - "grad_norm": 2.990189429590757, - "learning_rate": 3.430963417050404e-10, - "loss": 0.6702, - "step": 1560 - }, - { - "epoch": 1.99616368286445, - "grad_norm": 3.053399076926869, - "learning_rate": 1.929921750287722e-10, - "loss": 0.697, - "step": 1561 - }, - { - "epoch": 1.9974424552429668, - "grad_norm": 3.02110706790344, - "learning_rate": 8.577445328894485e-11, - "loss": 0.7564, - "step": 1562 - }, - { - "epoch": 1.9987212276214834, - "grad_norm": 2.6675586079472455, - "learning_rate": 2.1443636313289718e-11, - "loss": 0.5788, - "step": 1563 - }, - { - "epoch": 2.0, - "grad_norm": 1.6765900038196582, - "learning_rate": 0.0, - "loss": 0.2796, - "step": 1564 - }, - { - "epoch": 2.0, - "step": 1564, - "total_flos": 259266046148608.0, - "train_loss": 0.9449686132695364, - "train_runtime": 5806.7241, - "train_samples_per_second": 17.22, - "train_steps_per_second": 0.269 - } - ], - "logging_steps": 1.0, - "max_steps": 1564, - "num_input_tokens_seen": 0, - "num_train_epochs": 2, - "save_steps": 50000, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": false, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 259266046148608.0, - "train_batch_size": 8, - "trial_name": null, - "trial_params": null -}