{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.09684688389229731, "eval_steps": 500, "global_step": 1472, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.579272003552806e-05, "grad_norm": 0.5296143293380737, "learning_rate": 2e-05, "loss": 2.0928, "step": 1 }, { "epoch": 0.00013158544007105613, "grad_norm": 0.531254768371582, "learning_rate": 4e-05, "loss": 2.124, "step": 2 }, { "epoch": 0.00019737816010658422, "grad_norm": 0.5184187293052673, "learning_rate": 6e-05, "loss": 2.0397, "step": 3 }, { "epoch": 0.00026317088014211226, "grad_norm": 0.5384601950645447, "learning_rate": 8e-05, "loss": 2.1003, "step": 4 }, { "epoch": 0.0003289636001776403, "grad_norm": 0.6538830399513245, "learning_rate": 0.0001, "loss": 2.1672, "step": 5 }, { "epoch": 0.00039475632021316844, "grad_norm": 0.6739334464073181, "learning_rate": 9.999988534860049e-05, "loss": 2.0353, "step": 6 }, { "epoch": 0.0004605490402486965, "grad_norm": 0.7221142649650574, "learning_rate": 9.999954139492774e-05, "loss": 1.9623, "step": 7 }, { "epoch": 0.0005263417602842245, "grad_norm": 0.5722587704658508, "learning_rate": 9.999896814055916e-05, "loss": 1.9115, "step": 8 }, { "epoch": 0.0005921344803197526, "grad_norm": 0.5293303728103638, "learning_rate": 9.99981655881237e-05, "loss": 1.9862, "step": 9 }, { "epoch": 0.0006579272003552806, "grad_norm": 0.7691564559936523, "learning_rate": 9.999713374130194e-05, "loss": 1.7427, "step": 10 }, { "epoch": 0.0007237199203908088, "grad_norm": 0.8434930443763733, "learning_rate": 9.999587260482597e-05, "loss": 1.9118, "step": 11 }, { "epoch": 0.0007895126404263369, "grad_norm": 0.9281416535377502, "learning_rate": 9.999438218447944e-05, "loss": 2.0491, "step": 12 }, { "epoch": 0.0008553053604618649, "grad_norm": 0.7365149259567261, "learning_rate": 9.999266248709749e-05, "loss": 1.8001, "step": 13 }, { "epoch": 0.000921098080497393, "grad_norm": 0.7132240533828735, "learning_rate": 9.999071352056675e-05, "loss": 1.8975, "step": 14 }, { "epoch": 0.000986890800532921, "grad_norm": 0.644156813621521, "learning_rate": 9.99885352938253e-05, "loss": 1.8091, "step": 15 }, { "epoch": 0.001052683520568449, "grad_norm": 0.6430739164352417, "learning_rate": 9.99861278168626e-05, "loss": 1.7025, "step": 16 }, { "epoch": 0.0011184762406039773, "grad_norm": 0.7189786434173584, "learning_rate": 9.998349110071949e-05, "loss": 1.7063, "step": 17 }, { "epoch": 0.0011842689606395053, "grad_norm": 0.7468182444572449, "learning_rate": 9.998062515748809e-05, "loss": 1.7283, "step": 18 }, { "epoch": 0.0012500616806750333, "grad_norm": 0.8974145650863647, "learning_rate": 9.997753000031175e-05, "loss": 1.8559, "step": 19 }, { "epoch": 0.0013158544007105613, "grad_norm": 0.8581226468086243, "learning_rate": 9.99742056433851e-05, "loss": 1.8267, "step": 20 }, { "epoch": 0.0013816471207460895, "grad_norm": 0.8268604874610901, "learning_rate": 9.997065210195373e-05, "loss": 1.8561, "step": 21 }, { "epoch": 0.0014474398407816175, "grad_norm": 0.8176312446594238, "learning_rate": 9.996686939231447e-05, "loss": 1.9261, "step": 22 }, { "epoch": 0.0015132325608171455, "grad_norm": 0.7667636275291443, "learning_rate": 9.9962857531815e-05, "loss": 1.7929, "step": 23 }, { "epoch": 0.0015790252808526738, "grad_norm": 0.7837241291999817, "learning_rate": 9.995861653885393e-05, "loss": 1.7541, "step": 24 }, { "epoch": 0.0016448180008882018, "grad_norm": 0.8536595106124878, "learning_rate": 9.99541464328807e-05, "loss": 1.6584, "step": 25 }, { "epoch": 0.0017106107209237298, "grad_norm": 0.775619626045227, "learning_rate": 9.994944723439546e-05, "loss": 1.7213, "step": 26 }, { "epoch": 0.0017764034409592578, "grad_norm": 0.8294109106063843, "learning_rate": 9.994451896494901e-05, "loss": 1.7305, "step": 27 }, { "epoch": 0.001842196160994786, "grad_norm": 0.7727370262145996, "learning_rate": 9.993936164714266e-05, "loss": 1.7374, "step": 28 }, { "epoch": 0.001907988881030314, "grad_norm": 0.7547221183776855, "learning_rate": 9.993397530462818e-05, "loss": 1.7342, "step": 29 }, { "epoch": 0.001973781601065842, "grad_norm": 0.7097113132476807, "learning_rate": 9.99283599621076e-05, "loss": 1.7741, "step": 30 }, { "epoch": 0.00203957432110137, "grad_norm": 0.8320249319076538, "learning_rate": 9.992251564533322e-05, "loss": 1.8332, "step": 31 }, { "epoch": 0.002105367041136898, "grad_norm": 0.7366943359375, "learning_rate": 9.99164423811074e-05, "loss": 1.5746, "step": 32 }, { "epoch": 0.002171159761172426, "grad_norm": 0.7390754818916321, "learning_rate": 9.991014019728246e-05, "loss": 1.6085, "step": 33 }, { "epoch": 0.0022369524812079545, "grad_norm": 0.77167809009552, "learning_rate": 9.99036091227606e-05, "loss": 1.6211, "step": 34 }, { "epoch": 0.0023027452012434825, "grad_norm": 0.7611998319625854, "learning_rate": 9.989684918749365e-05, "loss": 1.7032, "step": 35 }, { "epoch": 0.0023685379212790105, "grad_norm": 0.7919670343399048, "learning_rate": 9.988986042248308e-05, "loss": 1.5274, "step": 36 }, { "epoch": 0.0024343306413145385, "grad_norm": 0.8527458906173706, "learning_rate": 9.988264285977974e-05, "loss": 1.5267, "step": 37 }, { "epoch": 0.0025001233613500666, "grad_norm": 0.8847199082374573, "learning_rate": 9.987519653248378e-05, "loss": 1.6056, "step": 38 }, { "epoch": 0.0025659160813855946, "grad_norm": 0.7511000633239746, "learning_rate": 9.986752147474449e-05, "loss": 1.5865, "step": 39 }, { "epoch": 0.0026317088014211226, "grad_norm": 0.8162057399749756, "learning_rate": 9.985961772176009e-05, "loss": 1.7024, "step": 40 }, { "epoch": 0.002697501521456651, "grad_norm": 0.937961757183075, "learning_rate": 9.985148530977767e-05, "loss": 1.6629, "step": 41 }, { "epoch": 0.002763294241492179, "grad_norm": 0.9329769015312195, "learning_rate": 9.984312427609287e-05, "loss": 1.7787, "step": 42 }, { "epoch": 0.002829086961527707, "grad_norm": 0.9981400370597839, "learning_rate": 9.983453465904992e-05, "loss": 1.6868, "step": 43 }, { "epoch": 0.002894879681563235, "grad_norm": 0.8342626690864563, "learning_rate": 9.982571649804126e-05, "loss": 1.6639, "step": 44 }, { "epoch": 0.002960672401598763, "grad_norm": 0.9021373391151428, "learning_rate": 9.981666983350746e-05, "loss": 1.6202, "step": 45 }, { "epoch": 0.003026465121634291, "grad_norm": 0.9746200442314148, "learning_rate": 9.980739470693704e-05, "loss": 1.5423, "step": 46 }, { "epoch": 0.003092257841669819, "grad_norm": 0.9255953431129456, "learning_rate": 9.979789116086625e-05, "loss": 1.686, "step": 47 }, { "epoch": 0.0031580505617053475, "grad_norm": 1.0396703481674194, "learning_rate": 9.978815923887887e-05, "loss": 1.6445, "step": 48 }, { "epoch": 0.0032238432817408755, "grad_norm": 1.0552101135253906, "learning_rate": 9.977819898560605e-05, "loss": 1.7063, "step": 49 }, { "epoch": 0.0032896360017764035, "grad_norm": 1.3620139360427856, "learning_rate": 9.976801044672608e-05, "loss": 1.4181, "step": 50 }, { "epoch": 0.0033554287218119315, "grad_norm": 0.8541191220283508, "learning_rate": 9.975759366896414e-05, "loss": 1.7838, "step": 51 }, { "epoch": 0.0034212214418474596, "grad_norm": 0.969934344291687, "learning_rate": 9.974694870009219e-05, "loss": 1.8725, "step": 52 }, { "epoch": 0.0034870141618829876, "grad_norm": 0.8264399170875549, "learning_rate": 9.973607558892864e-05, "loss": 1.7727, "step": 53 }, { "epoch": 0.0035528068819185156, "grad_norm": 0.7832493185997009, "learning_rate": 9.972497438533817e-05, "loss": 1.8766, "step": 54 }, { "epoch": 0.0036185996019540436, "grad_norm": 0.6866966485977173, "learning_rate": 9.971364514023155e-05, "loss": 1.7002, "step": 55 }, { "epoch": 0.003684392321989572, "grad_norm": 0.5976539850234985, "learning_rate": 9.970208790556532e-05, "loss": 1.7585, "step": 56 }, { "epoch": 0.0037501850420251, "grad_norm": 0.6232089400291443, "learning_rate": 9.969030273434159e-05, "loss": 1.8284, "step": 57 }, { "epoch": 0.003815977762060628, "grad_norm": 0.6442340612411499, "learning_rate": 9.967828968060783e-05, "loss": 1.7818, "step": 58 }, { "epoch": 0.003881770482096156, "grad_norm": 0.6159253120422363, "learning_rate": 9.966604879945659e-05, "loss": 1.6145, "step": 59 }, { "epoch": 0.003947563202131684, "grad_norm": 0.6624003648757935, "learning_rate": 9.965358014702519e-05, "loss": 1.7501, "step": 60 }, { "epoch": 0.004013355922167212, "grad_norm": 0.6175729632377625, "learning_rate": 9.964088378049562e-05, "loss": 1.7301, "step": 61 }, { "epoch": 0.00407914864220274, "grad_norm": 0.622405469417572, "learning_rate": 9.962795975809411e-05, "loss": 1.647, "step": 62 }, { "epoch": 0.004144941362238268, "grad_norm": 0.6554991602897644, "learning_rate": 9.961480813909094e-05, "loss": 1.8178, "step": 63 }, { "epoch": 0.004210734082273796, "grad_norm": 0.7180414199829102, "learning_rate": 9.960142898380018e-05, "loss": 1.9175, "step": 64 }, { "epoch": 0.004276526802309324, "grad_norm": 0.6984550356864929, "learning_rate": 9.958782235357938e-05, "loss": 1.8153, "step": 65 }, { "epoch": 0.004342319522344852, "grad_norm": 0.6315418481826782, "learning_rate": 9.957398831082931e-05, "loss": 1.6787, "step": 66 }, { "epoch": 0.004408112242380381, "grad_norm": 0.6879199743270874, "learning_rate": 9.955992691899367e-05, "loss": 1.61, "step": 67 }, { "epoch": 0.004473904962415909, "grad_norm": 0.6576867699623108, "learning_rate": 9.954563824255878e-05, "loss": 1.8015, "step": 68 }, { "epoch": 0.004539697682451437, "grad_norm": 0.656387209892273, "learning_rate": 9.953112234705333e-05, "loss": 1.8581, "step": 69 }, { "epoch": 0.004605490402486965, "grad_norm": 0.6445766091346741, "learning_rate": 9.9516379299048e-05, "loss": 1.7644, "step": 70 }, { "epoch": 0.004671283122522493, "grad_norm": 0.6688461303710938, "learning_rate": 9.950140916615526e-05, "loss": 1.7838, "step": 71 }, { "epoch": 0.004737075842558021, "grad_norm": 0.7055814862251282, "learning_rate": 9.948621201702896e-05, "loss": 1.6804, "step": 72 }, { "epoch": 0.004802868562593549, "grad_norm": 0.7650384306907654, "learning_rate": 9.947078792136408e-05, "loss": 1.7436, "step": 73 }, { "epoch": 0.004868661282629077, "grad_norm": 0.7580650448799133, "learning_rate": 9.945513694989639e-05, "loss": 1.6489, "step": 74 }, { "epoch": 0.004934454002664605, "grad_norm": 0.7199274301528931, "learning_rate": 9.943925917440214e-05, "loss": 1.7196, "step": 75 }, { "epoch": 0.005000246722700133, "grad_norm": 0.7812178134918213, "learning_rate": 9.942315466769765e-05, "loss": 1.6552, "step": 76 }, { "epoch": 0.005066039442735661, "grad_norm": 0.7464516758918762, "learning_rate": 9.940682350363912e-05, "loss": 1.7772, "step": 77 }, { "epoch": 0.005131832162771189, "grad_norm": 0.7458325028419495, "learning_rate": 9.939026575712218e-05, "loss": 1.6198, "step": 78 }, { "epoch": 0.005197624882806717, "grad_norm": 0.7330995202064514, "learning_rate": 9.937348150408159e-05, "loss": 1.7143, "step": 79 }, { "epoch": 0.005263417602842245, "grad_norm": 0.7014390826225281, "learning_rate": 9.935647082149086e-05, "loss": 1.5854, "step": 80 }, { "epoch": 0.005329210322877773, "grad_norm": 0.7711056470870972, "learning_rate": 9.933923378736194e-05, "loss": 1.8168, "step": 81 }, { "epoch": 0.005395003042913302, "grad_norm": 0.7262523174285889, "learning_rate": 9.932177048074483e-05, "loss": 1.7339, "step": 82 }, { "epoch": 0.00546079576294883, "grad_norm": 0.7787086367607117, "learning_rate": 9.930408098172725e-05, "loss": 1.6516, "step": 83 }, { "epoch": 0.005526588482984358, "grad_norm": 0.6976877450942993, "learning_rate": 9.928616537143421e-05, "loss": 1.6226, "step": 84 }, { "epoch": 0.005592381203019886, "grad_norm": 0.7678042650222778, "learning_rate": 9.926802373202768e-05, "loss": 1.6561, "step": 85 }, { "epoch": 0.005658173923055414, "grad_norm": 0.766779363155365, "learning_rate": 9.924965614670629e-05, "loss": 1.6439, "step": 86 }, { "epoch": 0.005723966643090942, "grad_norm": 0.7825470566749573, "learning_rate": 9.923106269970478e-05, "loss": 1.6673, "step": 87 }, { "epoch": 0.00578975936312647, "grad_norm": 0.8223551511764526, "learning_rate": 9.921224347629374e-05, "loss": 1.548, "step": 88 }, { "epoch": 0.005855552083161998, "grad_norm": 0.8139538168907166, "learning_rate": 9.91931985627792e-05, "loss": 1.5381, "step": 89 }, { "epoch": 0.005921344803197526, "grad_norm": 0.7926769256591797, "learning_rate": 9.917392804650218e-05, "loss": 1.6728, "step": 90 }, { "epoch": 0.005987137523233054, "grad_norm": 0.9060941934585571, "learning_rate": 9.915443201583834e-05, "loss": 1.6211, "step": 91 }, { "epoch": 0.006052930243268582, "grad_norm": 0.8837563991546631, "learning_rate": 9.91347105601976e-05, "loss": 1.5703, "step": 92 }, { "epoch": 0.00611872296330411, "grad_norm": 0.8119835257530212, "learning_rate": 9.911476377002364e-05, "loss": 1.5577, "step": 93 }, { "epoch": 0.006184515683339638, "grad_norm": 0.8880691528320312, "learning_rate": 9.909459173679354e-05, "loss": 1.7592, "step": 94 }, { "epoch": 0.006250308403375166, "grad_norm": 0.8486512303352356, "learning_rate": 9.907419455301741e-05, "loss": 1.6063, "step": 95 }, { "epoch": 0.006316101123410695, "grad_norm": 0.9571446180343628, "learning_rate": 9.905357231223785e-05, "loss": 1.711, "step": 96 }, { "epoch": 0.006381893843446223, "grad_norm": 0.9212421178817749, "learning_rate": 9.90327251090296e-05, "loss": 1.4925, "step": 97 }, { "epoch": 0.006447686563481751, "grad_norm": 0.9694377183914185, "learning_rate": 9.901165303899916e-05, "loss": 1.7433, "step": 98 }, { "epoch": 0.006513479283517279, "grad_norm": 1.1317200660705566, "learning_rate": 9.899035619878414e-05, "loss": 1.6807, "step": 99 }, { "epoch": 0.006579272003552807, "grad_norm": 1.1567927598953247, "learning_rate": 9.896883468605311e-05, "loss": 1.2842, "step": 100 }, { "epoch": 0.006645064723588335, "grad_norm": 0.6888508796691895, "learning_rate": 9.89470885995049e-05, "loss": 1.8472, "step": 101 }, { "epoch": 0.006710857443623863, "grad_norm": 0.6463817954063416, "learning_rate": 9.892511803886828e-05, "loss": 1.8032, "step": 102 }, { "epoch": 0.006776650163659391, "grad_norm": 0.6593803763389587, "learning_rate": 9.890292310490147e-05, "loss": 1.8945, "step": 103 }, { "epoch": 0.006842442883694919, "grad_norm": 0.6805739402770996, "learning_rate": 9.888050389939172e-05, "loss": 1.6301, "step": 104 }, { "epoch": 0.006908235603730447, "grad_norm": 0.6324853301048279, "learning_rate": 9.88578605251547e-05, "loss": 1.9058, "step": 105 }, { "epoch": 0.006974028323765975, "grad_norm": 0.5630888342857361, "learning_rate": 9.883499308603422e-05, "loss": 1.7052, "step": 106 }, { "epoch": 0.007039821043801503, "grad_norm": 0.5972461104393005, "learning_rate": 9.881190168690164e-05, "loss": 1.7202, "step": 107 }, { "epoch": 0.007105613763837031, "grad_norm": 0.6968611478805542, "learning_rate": 9.878858643365541e-05, "loss": 1.829, "step": 108 }, { "epoch": 0.007171406483872559, "grad_norm": 0.5739532113075256, "learning_rate": 9.876504743322057e-05, "loss": 1.7746, "step": 109 }, { "epoch": 0.007237199203908087, "grad_norm": 0.6316085457801819, "learning_rate": 9.874128479354832e-05, "loss": 1.7302, "step": 110 }, { "epoch": 0.007302991923943616, "grad_norm": 0.5930875539779663, "learning_rate": 9.871729862361543e-05, "loss": 1.625, "step": 111 }, { "epoch": 0.007368784643979144, "grad_norm": 0.6493741869926453, "learning_rate": 9.869308903342383e-05, "loss": 1.5649, "step": 112 }, { "epoch": 0.007434577364014672, "grad_norm": 0.6738541722297668, "learning_rate": 9.866865613400008e-05, "loss": 1.8187, "step": 113 }, { "epoch": 0.0075003700840502, "grad_norm": 0.6308318972587585, "learning_rate": 9.864400003739476e-05, "loss": 1.6503, "step": 114 }, { "epoch": 0.007566162804085728, "grad_norm": 0.6296387910842896, "learning_rate": 9.861912085668216e-05, "loss": 1.6963, "step": 115 }, { "epoch": 0.007631955524121256, "grad_norm": 0.6289705634117126, "learning_rate": 9.859401870595959e-05, "loss": 1.4958, "step": 116 }, { "epoch": 0.007697748244156784, "grad_norm": 0.7387570142745972, "learning_rate": 9.856869370034692e-05, "loss": 1.7227, "step": 117 }, { "epoch": 0.007763540964192312, "grad_norm": 0.6664671301841736, "learning_rate": 9.854314595598602e-05, "loss": 1.7069, "step": 118 }, { "epoch": 0.00782933368422784, "grad_norm": 0.6984542608261108, "learning_rate": 9.85173755900403e-05, "loss": 1.8907, "step": 119 }, { "epoch": 0.007895126404263368, "grad_norm": 0.6584218144416809, "learning_rate": 9.84913827206941e-05, "loss": 1.5777, "step": 120 }, { "epoch": 0.007960919124298896, "grad_norm": 0.6700417399406433, "learning_rate": 9.846516746715217e-05, "loss": 1.5087, "step": 121 }, { "epoch": 0.008026711844334424, "grad_norm": 0.7032206654548645, "learning_rate": 9.843872994963911e-05, "loss": 1.591, "step": 122 }, { "epoch": 0.008092504564369952, "grad_norm": 0.6893160939216614, "learning_rate": 9.841207028939889e-05, "loss": 1.5799, "step": 123 }, { "epoch": 0.00815829728440548, "grad_norm": 0.7209369540214539, "learning_rate": 9.838518860869416e-05, "loss": 1.5601, "step": 124 }, { "epoch": 0.008224090004441008, "grad_norm": 0.7673851251602173, "learning_rate": 9.835808503080585e-05, "loss": 1.5634, "step": 125 }, { "epoch": 0.008289882724476536, "grad_norm": 0.810266375541687, "learning_rate": 9.833075968003249e-05, "loss": 1.8813, "step": 126 }, { "epoch": 0.008355675444512064, "grad_norm": 0.6878501176834106, "learning_rate": 9.830321268168965e-05, "loss": 1.5009, "step": 127 }, { "epoch": 0.008421468164547592, "grad_norm": 0.7561343908309937, "learning_rate": 9.827544416210941e-05, "loss": 1.7062, "step": 128 }, { "epoch": 0.00848726088458312, "grad_norm": 0.6917654275894165, "learning_rate": 9.824745424863973e-05, "loss": 1.5736, "step": 129 }, { "epoch": 0.008553053604618648, "grad_norm": 0.826567530632019, "learning_rate": 9.821924306964398e-05, "loss": 1.7886, "step": 130 }, { "epoch": 0.008618846324654176, "grad_norm": 0.7179293036460876, "learning_rate": 9.819081075450014e-05, "loss": 1.6986, "step": 131 }, { "epoch": 0.008684639044689704, "grad_norm": 0.7051801085472107, "learning_rate": 9.816215743360044e-05, "loss": 1.4127, "step": 132 }, { "epoch": 0.008750431764725234, "grad_norm": 0.7013904452323914, "learning_rate": 9.813328323835061e-05, "loss": 1.7484, "step": 133 }, { "epoch": 0.008816224484760762, "grad_norm": 0.7020507454872131, "learning_rate": 9.810418830116932e-05, "loss": 1.7741, "step": 134 }, { "epoch": 0.00888201720479629, "grad_norm": 0.7591568231582642, "learning_rate": 9.807487275548757e-05, "loss": 1.5618, "step": 135 }, { "epoch": 0.008947809924831818, "grad_norm": 0.739377498626709, "learning_rate": 9.804533673574812e-05, "loss": 1.4941, "step": 136 }, { "epoch": 0.009013602644867346, "grad_norm": 0.7584704756736755, "learning_rate": 9.801558037740478e-05, "loss": 1.7772, "step": 137 }, { "epoch": 0.009079395364902874, "grad_norm": 0.7530946135520935, "learning_rate": 9.79856038169219e-05, "loss": 1.5457, "step": 138 }, { "epoch": 0.009145188084938402, "grad_norm": 0.7504359483718872, "learning_rate": 9.795540719177365e-05, "loss": 1.6163, "step": 139 }, { "epoch": 0.00921098080497393, "grad_norm": 0.8109403252601624, "learning_rate": 9.792499064044342e-05, "loss": 1.7455, "step": 140 }, { "epoch": 0.009276773525009458, "grad_norm": 0.8034817576408386, "learning_rate": 9.789435430242329e-05, "loss": 1.5284, "step": 141 }, { "epoch": 0.009342566245044986, "grad_norm": 0.8187747597694397, "learning_rate": 9.786349831821314e-05, "loss": 1.6745, "step": 142 }, { "epoch": 0.009408358965080514, "grad_norm": 0.7957119345664978, "learning_rate": 9.783242282932028e-05, "loss": 1.5745, "step": 143 }, { "epoch": 0.009474151685116042, "grad_norm": 0.8206056952476501, "learning_rate": 9.780112797825865e-05, "loss": 1.6354, "step": 144 }, { "epoch": 0.00953994440515157, "grad_norm": 0.8320123553276062, "learning_rate": 9.776961390854818e-05, "loss": 1.5993, "step": 145 }, { "epoch": 0.009605737125187098, "grad_norm": 0.8511732220649719, "learning_rate": 9.773788076471414e-05, "loss": 1.5566, "step": 146 }, { "epoch": 0.009671529845222626, "grad_norm": 0.8763437867164612, "learning_rate": 9.770592869228653e-05, "loss": 1.7509, "step": 147 }, { "epoch": 0.009737322565258154, "grad_norm": 0.9537453651428223, "learning_rate": 9.767375783779932e-05, "loss": 1.58, "step": 148 }, { "epoch": 0.009803115285293682, "grad_norm": 1.1339179277420044, "learning_rate": 9.764136834878986e-05, "loss": 1.5152, "step": 149 }, { "epoch": 0.00986890800532921, "grad_norm": 1.4353673458099365, "learning_rate": 9.760876037379816e-05, "loss": 1.4473, "step": 150 }, { "epoch": 0.009934700725364738, "grad_norm": 0.62269127368927, "learning_rate": 9.757593406236623e-05, "loss": 1.6958, "step": 151 }, { "epoch": 0.010000493445400266, "grad_norm": 0.7061877250671387, "learning_rate": 9.754288956503736e-05, "loss": 1.7587, "step": 152 }, { "epoch": 0.010066286165435794, "grad_norm": 0.680920422077179, "learning_rate": 9.750962703335547e-05, "loss": 1.81, "step": 153 }, { "epoch": 0.010132078885471322, "grad_norm": 0.6718249917030334, "learning_rate": 9.747614661986437e-05, "loss": 1.8675, "step": 154 }, { "epoch": 0.01019787160550685, "grad_norm": 0.6467337608337402, "learning_rate": 9.744244847810716e-05, "loss": 1.7655, "step": 155 }, { "epoch": 0.010263664325542378, "grad_norm": 0.6753641963005066, "learning_rate": 9.740853276262534e-05, "loss": 1.6277, "step": 156 }, { "epoch": 0.010329457045577906, "grad_norm": 0.5853750705718994, "learning_rate": 9.737439962895832e-05, "loss": 1.7043, "step": 157 }, { "epoch": 0.010395249765613434, "grad_norm": 0.6154720187187195, "learning_rate": 9.734004923364257e-05, "loss": 1.6254, "step": 158 }, { "epoch": 0.010461042485648962, "grad_norm": 0.6213800311088562, "learning_rate": 9.73054817342109e-05, "loss": 1.7266, "step": 159 }, { "epoch": 0.01052683520568449, "grad_norm": 0.6310060620307922, "learning_rate": 9.727069728919181e-05, "loss": 1.5622, "step": 160 }, { "epoch": 0.010592627925720018, "grad_norm": 0.6443656086921692, "learning_rate": 9.723569605810871e-05, "loss": 1.7595, "step": 161 }, { "epoch": 0.010658420645755546, "grad_norm": 0.6360974907875061, "learning_rate": 9.720047820147919e-05, "loss": 1.5975, "step": 162 }, { "epoch": 0.010724213365791076, "grad_norm": 0.648723840713501, "learning_rate": 9.716504388081436e-05, "loss": 1.6944, "step": 163 }, { "epoch": 0.010790006085826604, "grad_norm": 0.632750928401947, "learning_rate": 9.712939325861794e-05, "loss": 1.7593, "step": 164 }, { "epoch": 0.010855798805862132, "grad_norm": 0.6332002282142639, "learning_rate": 9.709352649838573e-05, "loss": 1.6756, "step": 165 }, { "epoch": 0.01092159152589766, "grad_norm": 0.6480251550674438, "learning_rate": 9.705744376460464e-05, "loss": 1.7099, "step": 166 }, { "epoch": 0.010987384245933188, "grad_norm": 0.6388484239578247, "learning_rate": 9.702114522275216e-05, "loss": 1.7719, "step": 167 }, { "epoch": 0.011053176965968716, "grad_norm": 0.5917187333106995, "learning_rate": 9.698463103929542e-05, "loss": 1.5279, "step": 168 }, { "epoch": 0.011118969686004244, "grad_norm": 0.6601297855377197, "learning_rate": 9.694790138169051e-05, "loss": 1.86, "step": 169 }, { "epoch": 0.011184762406039772, "grad_norm": 0.6304008364677429, "learning_rate": 9.691095641838169e-05, "loss": 1.6616, "step": 170 }, { "epoch": 0.0112505551260753, "grad_norm": 0.7166048288345337, "learning_rate": 9.687379631880062e-05, "loss": 1.6271, "step": 171 }, { "epoch": 0.011316347846110828, "grad_norm": 0.6489748954772949, "learning_rate": 9.683642125336562e-05, "loss": 1.6032, "step": 172 }, { "epoch": 0.011382140566146356, "grad_norm": 0.5979589223861694, "learning_rate": 9.679883139348082e-05, "loss": 1.6637, "step": 173 }, { "epoch": 0.011447933286181884, "grad_norm": 0.6470814347267151, "learning_rate": 9.676102691153542e-05, "loss": 1.5793, "step": 174 }, { "epoch": 0.011513726006217412, "grad_norm": 0.6679474711418152, "learning_rate": 9.67230079809029e-05, "loss": 1.5953, "step": 175 }, { "epoch": 0.01157951872625294, "grad_norm": 0.6367717385292053, "learning_rate": 9.66847747759402e-05, "loss": 1.5554, "step": 176 }, { "epoch": 0.011645311446288468, "grad_norm": 0.6821426749229431, "learning_rate": 9.664632747198693e-05, "loss": 1.6466, "step": 177 }, { "epoch": 0.011711104166323996, "grad_norm": 0.6893940567970276, "learning_rate": 9.660766624536459e-05, "loss": 1.6491, "step": 178 }, { "epoch": 0.011776896886359524, "grad_norm": 0.6703565716743469, "learning_rate": 9.656879127337571e-05, "loss": 1.664, "step": 179 }, { "epoch": 0.011842689606395052, "grad_norm": 0.739335298538208, "learning_rate": 9.652970273430312e-05, "loss": 1.6553, "step": 180 }, { "epoch": 0.01190848232643058, "grad_norm": 0.7087481617927551, "learning_rate": 9.649040080740902e-05, "loss": 1.6274, "step": 181 }, { "epoch": 0.011974275046466108, "grad_norm": 0.7415068745613098, "learning_rate": 9.645088567293426e-05, "loss": 1.6109, "step": 182 }, { "epoch": 0.012040067766501636, "grad_norm": 0.7282552123069763, "learning_rate": 9.641115751209746e-05, "loss": 1.5478, "step": 183 }, { "epoch": 0.012105860486537164, "grad_norm": 0.7318709492683411, "learning_rate": 9.637121650709417e-05, "loss": 1.7496, "step": 184 }, { "epoch": 0.012171653206572692, "grad_norm": 0.7077505588531494, "learning_rate": 9.63310628410961e-05, "loss": 1.697, "step": 185 }, { "epoch": 0.01223744592660822, "grad_norm": 0.7727397680282593, "learning_rate": 9.629069669825021e-05, "loss": 1.6696, "step": 186 }, { "epoch": 0.012303238646643748, "grad_norm": 0.7243698239326477, "learning_rate": 9.625011826367787e-05, "loss": 1.7203, "step": 187 }, { "epoch": 0.012369031366679276, "grad_norm": 0.754132091999054, "learning_rate": 9.620932772347408e-05, "loss": 1.6186, "step": 188 }, { "epoch": 0.012434824086714804, "grad_norm": 0.744714617729187, "learning_rate": 9.61683252647065e-05, "loss": 1.6537, "step": 189 }, { "epoch": 0.012500616806750332, "grad_norm": 0.8108708262443542, "learning_rate": 9.612711107541474e-05, "loss": 1.6096, "step": 190 }, { "epoch": 0.01256640952678586, "grad_norm": 0.7732747793197632, "learning_rate": 9.608568534460936e-05, "loss": 1.5707, "step": 191 }, { "epoch": 0.01263220224682139, "grad_norm": 0.8782806396484375, "learning_rate": 9.60440482622711e-05, "loss": 1.6991, "step": 192 }, { "epoch": 0.012697994966856918, "grad_norm": 0.8542294502258301, "learning_rate": 9.600220001934993e-05, "loss": 1.4367, "step": 193 }, { "epoch": 0.012763787686892446, "grad_norm": 0.877135694026947, "learning_rate": 9.596014080776423e-05, "loss": 1.6514, "step": 194 }, { "epoch": 0.012829580406927974, "grad_norm": 0.9459388256072998, "learning_rate": 9.591787082039992e-05, "loss": 1.6164, "step": 195 }, { "epoch": 0.012895373126963502, "grad_norm": 0.8651464581489563, "learning_rate": 9.587539025110952e-05, "loss": 1.6191, "step": 196 }, { "epoch": 0.01296116584699903, "grad_norm": 0.8998655080795288, "learning_rate": 9.583269929471128e-05, "loss": 1.4867, "step": 197 }, { "epoch": 0.013026958567034558, "grad_norm": 0.9345824122428894, "learning_rate": 9.578979814698835e-05, "loss": 1.6862, "step": 198 }, { "epoch": 0.013092751287070086, "grad_norm": 1.0395649671554565, "learning_rate": 9.574668700468777e-05, "loss": 1.4863, "step": 199 }, { "epoch": 0.013158544007105614, "grad_norm": 1.145094871520996, "learning_rate": 9.570336606551967e-05, "loss": 1.534, "step": 200 }, { "epoch": 0.013224336727141142, "grad_norm": 0.6736162304878235, "learning_rate": 9.565983552815628e-05, "loss": 1.7361, "step": 201 }, { "epoch": 0.01329012944717667, "grad_norm": 0.7287967205047607, "learning_rate": 9.56160955922311e-05, "loss": 1.9231, "step": 202 }, { "epoch": 0.013355922167212198, "grad_norm": 0.6452571153640747, "learning_rate": 9.557214645833792e-05, "loss": 1.7251, "step": 203 }, { "epoch": 0.013421714887247726, "grad_norm": 0.6660079956054688, "learning_rate": 9.552798832802993e-05, "loss": 1.7163, "step": 204 }, { "epoch": 0.013487507607283254, "grad_norm": 0.6526784896850586, "learning_rate": 9.548362140381876e-05, "loss": 1.6819, "step": 205 }, { "epoch": 0.013553300327318782, "grad_norm": 0.6540437340736389, "learning_rate": 9.543904588917367e-05, "loss": 1.7811, "step": 206 }, { "epoch": 0.01361909304735431, "grad_norm": 0.5529007911682129, "learning_rate": 9.53942619885204e-05, "loss": 1.6977, "step": 207 }, { "epoch": 0.013684885767389838, "grad_norm": 0.608326256275177, "learning_rate": 9.534926990724046e-05, "loss": 1.592, "step": 208 }, { "epoch": 0.013750678487425366, "grad_norm": 0.6321333050727844, "learning_rate": 9.530406985167004e-05, "loss": 1.7665, "step": 209 }, { "epoch": 0.013816471207460894, "grad_norm": 0.592158317565918, "learning_rate": 9.525866202909915e-05, "loss": 1.722, "step": 210 }, { "epoch": 0.013882263927496422, "grad_norm": 0.5666524767875671, "learning_rate": 9.521304664777058e-05, "loss": 1.652, "step": 211 }, { "epoch": 0.01394805664753195, "grad_norm": 0.6615280508995056, "learning_rate": 9.516722391687902e-05, "loss": 1.8336, "step": 212 }, { "epoch": 0.014013849367567478, "grad_norm": 0.6410874128341675, "learning_rate": 9.51211940465701e-05, "loss": 1.8056, "step": 213 }, { "epoch": 0.014079642087603006, "grad_norm": 0.6361430287361145, "learning_rate": 9.507495724793937e-05, "loss": 1.8389, "step": 214 }, { "epoch": 0.014145434807638534, "grad_norm": 0.6516650915145874, "learning_rate": 9.502851373303136e-05, "loss": 1.7175, "step": 215 }, { "epoch": 0.014211227527674062, "grad_norm": 2.113678455352783, "learning_rate": 9.498186371483868e-05, "loss": 1.6789, "step": 216 }, { "epoch": 0.01427702024770959, "grad_norm": 0.6748088598251343, "learning_rate": 9.493500740730089e-05, "loss": 1.6288, "step": 217 }, { "epoch": 0.014342812967745118, "grad_norm": 0.6387640833854675, "learning_rate": 9.488794502530362e-05, "loss": 1.6993, "step": 218 }, { "epoch": 0.014408605687780646, "grad_norm": 0.7229827046394348, "learning_rate": 9.484067678467761e-05, "loss": 1.5233, "step": 219 }, { "epoch": 0.014474398407816174, "grad_norm": 0.6891183257102966, "learning_rate": 9.479320290219766e-05, "loss": 1.7135, "step": 220 }, { "epoch": 0.014540191127851702, "grad_norm": 0.6379052400588989, "learning_rate": 9.474552359558166e-05, "loss": 1.5541, "step": 221 }, { "epoch": 0.014605983847887232, "grad_norm": 0.7389596104621887, "learning_rate": 9.469763908348957e-05, "loss": 1.7669, "step": 222 }, { "epoch": 0.01467177656792276, "grad_norm": 0.6853717565536499, "learning_rate": 9.464954958552244e-05, "loss": 1.6764, "step": 223 }, { "epoch": 0.014737569287958288, "grad_norm": 0.6811705231666565, "learning_rate": 9.460125532222141e-05, "loss": 1.6761, "step": 224 }, { "epoch": 0.014803362007993816, "grad_norm": 0.7454027533531189, "learning_rate": 9.455275651506665e-05, "loss": 1.657, "step": 225 }, { "epoch": 0.014869154728029344, "grad_norm": 0.6707807779312134, "learning_rate": 9.450405338647645e-05, "loss": 1.6695, "step": 226 }, { "epoch": 0.014934947448064872, "grad_norm": 0.698140025138855, "learning_rate": 9.445514615980604e-05, "loss": 1.658, "step": 227 }, { "epoch": 0.0150007401681004, "grad_norm": 0.6676850318908691, "learning_rate": 9.44060350593467e-05, "loss": 1.5562, "step": 228 }, { "epoch": 0.015066532888135928, "grad_norm": 0.7539533972740173, "learning_rate": 9.435672031032474e-05, "loss": 1.604, "step": 229 }, { "epoch": 0.015132325608171456, "grad_norm": 0.7509004473686218, "learning_rate": 9.43072021389003e-05, "loss": 1.5699, "step": 230 }, { "epoch": 0.015198118328206984, "grad_norm": 0.7379850149154663, "learning_rate": 9.425748077216649e-05, "loss": 1.6537, "step": 231 }, { "epoch": 0.015263911048242512, "grad_norm": 0.7617546319961548, "learning_rate": 9.420755643814833e-05, "loss": 1.7029, "step": 232 }, { "epoch": 0.01532970376827804, "grad_norm": 0.7647762894630432, "learning_rate": 9.415742936580157e-05, "loss": 1.6908, "step": 233 }, { "epoch": 0.015395496488313568, "grad_norm": 0.7109963297843933, "learning_rate": 9.410709978501177e-05, "loss": 1.6588, "step": 234 }, { "epoch": 0.015461289208349096, "grad_norm": 0.793974757194519, "learning_rate": 9.405656792659321e-05, "loss": 1.7287, "step": 235 }, { "epoch": 0.015527081928384624, "grad_norm": 0.7597332000732422, "learning_rate": 9.400583402228784e-05, "loss": 1.721, "step": 236 }, { "epoch": 0.015592874648420152, "grad_norm": 0.8103028535842896, "learning_rate": 9.395489830476417e-05, "loss": 1.5775, "step": 237 }, { "epoch": 0.01565866736845568, "grad_norm": 0.738635241985321, "learning_rate": 9.390376100761624e-05, "loss": 1.4594, "step": 238 }, { "epoch": 0.01572446008849121, "grad_norm": 0.7879254817962646, "learning_rate": 9.38524223653626e-05, "loss": 1.7778, "step": 239 }, { "epoch": 0.015790252808526736, "grad_norm": 0.7641370892524719, "learning_rate": 9.380088261344509e-05, "loss": 1.6935, "step": 240 }, { "epoch": 0.015856045528562266, "grad_norm": 0.8217648863792419, "learning_rate": 9.374914198822791e-05, "loss": 1.6218, "step": 241 }, { "epoch": 0.015921838248597792, "grad_norm": 0.799241840839386, "learning_rate": 9.369720072699647e-05, "loss": 1.6173, "step": 242 }, { "epoch": 0.015987630968633322, "grad_norm": 0.7930746674537659, "learning_rate": 9.364505906795631e-05, "loss": 1.6816, "step": 243 }, { "epoch": 0.01605342368866885, "grad_norm": 0.9224853515625, "learning_rate": 9.359271725023198e-05, "loss": 1.8139, "step": 244 }, { "epoch": 0.016119216408704378, "grad_norm": 0.8082649111747742, "learning_rate": 9.354017551386599e-05, "loss": 1.5355, "step": 245 }, { "epoch": 0.016185009128739904, "grad_norm": 0.9044872522354126, "learning_rate": 9.34874340998177e-05, "loss": 1.5935, "step": 246 }, { "epoch": 0.016250801848775434, "grad_norm": 0.8898908495903015, "learning_rate": 9.343449324996217e-05, "loss": 1.6187, "step": 247 }, { "epoch": 0.01631659456881096, "grad_norm": 0.8817726969718933, "learning_rate": 9.338135320708911e-05, "loss": 1.6481, "step": 248 }, { "epoch": 0.01638238728884649, "grad_norm": 1.0543079376220703, "learning_rate": 9.332801421490174e-05, "loss": 1.6283, "step": 249 }, { "epoch": 0.016448180008882016, "grad_norm": 1.1242119073867798, "learning_rate": 9.327447651801564e-05, "loss": 1.4045, "step": 250 }, { "epoch": 0.016513972728917546, "grad_norm": 0.6645945310592651, "learning_rate": 9.322074036195769e-05, "loss": 1.7638, "step": 251 }, { "epoch": 0.016579765448953072, "grad_norm": 0.636358380317688, "learning_rate": 9.316680599316493e-05, "loss": 1.8369, "step": 252 }, { "epoch": 0.016645558168988602, "grad_norm": 0.6823999881744385, "learning_rate": 9.311267365898337e-05, "loss": 1.7382, "step": 253 }, { "epoch": 0.01671135088902413, "grad_norm": 0.6345416903495789, "learning_rate": 9.305834360766695e-05, "loss": 1.6073, "step": 254 }, { "epoch": 0.016777143609059658, "grad_norm": 0.6101424694061279, "learning_rate": 9.300381608837631e-05, "loss": 1.705, "step": 255 }, { "epoch": 0.016842936329095184, "grad_norm": 0.6029302477836609, "learning_rate": 9.294909135117771e-05, "loss": 1.8328, "step": 256 }, { "epoch": 0.016908729049130714, "grad_norm": 0.5847995281219482, "learning_rate": 9.289416964704185e-05, "loss": 1.6327, "step": 257 }, { "epoch": 0.01697452176916624, "grad_norm": 0.6195299625396729, "learning_rate": 9.283905122784277e-05, "loss": 1.7127, "step": 258 }, { "epoch": 0.01704031448920177, "grad_norm": 0.5608875751495361, "learning_rate": 9.278373634635659e-05, "loss": 1.5031, "step": 259 }, { "epoch": 0.017106107209237296, "grad_norm": 0.5664215683937073, "learning_rate": 9.272822525626046e-05, "loss": 1.6431, "step": 260 }, { "epoch": 0.017171899929272826, "grad_norm": 0.5837283134460449, "learning_rate": 9.267251821213137e-05, "loss": 1.6068, "step": 261 }, { "epoch": 0.017237692649308353, "grad_norm": 0.6226207613945007, "learning_rate": 9.261661546944491e-05, "loss": 1.6153, "step": 262 }, { "epoch": 0.017303485369343882, "grad_norm": 0.6285345554351807, "learning_rate": 9.25605172845742e-05, "loss": 1.6145, "step": 263 }, { "epoch": 0.01736927808937941, "grad_norm": 0.6322391629219055, "learning_rate": 9.250422391478868e-05, "loss": 1.7426, "step": 264 }, { "epoch": 0.01743507080941494, "grad_norm": 0.599488377571106, "learning_rate": 9.244773561825287e-05, "loss": 1.5602, "step": 265 }, { "epoch": 0.017500863529450468, "grad_norm": 0.6227719187736511, "learning_rate": 9.239105265402525e-05, "loss": 1.5007, "step": 266 }, { "epoch": 0.017566656249485994, "grad_norm": 0.6282662153244019, "learning_rate": 9.23341752820571e-05, "loss": 1.6923, "step": 267 }, { "epoch": 0.017632448969521524, "grad_norm": 0.6106943488121033, "learning_rate": 9.22771037631912e-05, "loss": 1.5653, "step": 268 }, { "epoch": 0.01769824168955705, "grad_norm": 0.6486127972602844, "learning_rate": 9.221983835916074e-05, "loss": 1.568, "step": 269 }, { "epoch": 0.01776403440959258, "grad_norm": 0.6146793365478516, "learning_rate": 9.21623793325881e-05, "loss": 1.6671, "step": 270 }, { "epoch": 0.017829827129628106, "grad_norm": 0.6328508257865906, "learning_rate": 9.210472694698355e-05, "loss": 1.6419, "step": 271 }, { "epoch": 0.017895619849663636, "grad_norm": 0.6605221033096313, "learning_rate": 9.204688146674418e-05, "loss": 1.7364, "step": 272 }, { "epoch": 0.017961412569699162, "grad_norm": 0.6620165705680847, "learning_rate": 9.198884315715259e-05, "loss": 1.6665, "step": 273 }, { "epoch": 0.018027205289734692, "grad_norm": 0.6743021011352539, "learning_rate": 9.193061228437572e-05, "loss": 1.5406, "step": 274 }, { "epoch": 0.01809299800977022, "grad_norm": 0.6547907590866089, "learning_rate": 9.187218911546362e-05, "loss": 1.5747, "step": 275 }, { "epoch": 0.018158790729805748, "grad_norm": 0.7358336448669434, "learning_rate": 9.18135739183482e-05, "loss": 1.6917, "step": 276 }, { "epoch": 0.018224583449841274, "grad_norm": 0.6380569338798523, "learning_rate": 9.175476696184205e-05, "loss": 1.5399, "step": 277 }, { "epoch": 0.018290376169876804, "grad_norm": 0.7086791396141052, "learning_rate": 9.169576851563715e-05, "loss": 1.6781, "step": 278 }, { "epoch": 0.01835616888991233, "grad_norm": 0.7136445641517639, "learning_rate": 9.163657885030368e-05, "loss": 1.611, "step": 279 }, { "epoch": 0.01842196160994786, "grad_norm": 0.6863766312599182, "learning_rate": 9.157719823728876e-05, "loss": 1.6871, "step": 280 }, { "epoch": 0.018487754329983386, "grad_norm": 0.7350311875343323, "learning_rate": 9.151762694891521e-05, "loss": 1.508, "step": 281 }, { "epoch": 0.018553547050018916, "grad_norm": 0.6730437874794006, "learning_rate": 9.14578652583803e-05, "loss": 1.5196, "step": 282 }, { "epoch": 0.018619339770054442, "grad_norm": 0.718139111995697, "learning_rate": 9.139791343975448e-05, "loss": 1.5038, "step": 283 }, { "epoch": 0.018685132490089972, "grad_norm": 0.7707210183143616, "learning_rate": 9.133777176798013e-05, "loss": 1.729, "step": 284 }, { "epoch": 0.0187509252101255, "grad_norm": 0.7235831618309021, "learning_rate": 9.127744051887035e-05, "loss": 1.565, "step": 285 }, { "epoch": 0.018816717930161028, "grad_norm": 0.7182489633560181, "learning_rate": 9.121691996910762e-05, "loss": 1.6169, "step": 286 }, { "epoch": 0.018882510650196555, "grad_norm": 0.7400686144828796, "learning_rate": 9.115621039624256e-05, "loss": 1.653, "step": 287 }, { "epoch": 0.018948303370232084, "grad_norm": 0.7060942649841309, "learning_rate": 9.109531207869266e-05, "loss": 1.7122, "step": 288 }, { "epoch": 0.01901409609026761, "grad_norm": 0.7848304510116577, "learning_rate": 9.103422529574104e-05, "loss": 1.7381, "step": 289 }, { "epoch": 0.01907988881030314, "grad_norm": 0.7508149743080139, "learning_rate": 9.09729503275351e-05, "loss": 1.6243, "step": 290 }, { "epoch": 0.019145681530338667, "grad_norm": 0.765107274055481, "learning_rate": 9.091148745508526e-05, "loss": 1.6244, "step": 291 }, { "epoch": 0.019211474250374196, "grad_norm": 0.8006464242935181, "learning_rate": 9.08498369602637e-05, "loss": 1.5571, "step": 292 }, { "epoch": 0.019277266970409723, "grad_norm": 0.7692310214042664, "learning_rate": 9.078799912580304e-05, "loss": 1.6686, "step": 293 }, { "epoch": 0.019343059690445252, "grad_norm": 0.8822973966598511, "learning_rate": 9.072597423529508e-05, "loss": 1.6505, "step": 294 }, { "epoch": 0.01940885241048078, "grad_norm": 0.8218017220497131, "learning_rate": 9.066376257318938e-05, "loss": 1.5566, "step": 295 }, { "epoch": 0.01947464513051631, "grad_norm": 0.8378568887710571, "learning_rate": 9.060136442479215e-05, "loss": 1.5851, "step": 296 }, { "epoch": 0.019540437850551838, "grad_norm": 0.9615349173545837, "learning_rate": 9.053878007626478e-05, "loss": 1.6833, "step": 297 }, { "epoch": 0.019606230570587364, "grad_norm": 0.8999234437942505, "learning_rate": 9.04760098146226e-05, "loss": 1.5815, "step": 298 }, { "epoch": 0.019672023290622894, "grad_norm": 1.0168418884277344, "learning_rate": 9.041305392773354e-05, "loss": 1.7269, "step": 299 }, { "epoch": 0.01973781601065842, "grad_norm": 1.2946468591690063, "learning_rate": 9.034991270431681e-05, "loss": 1.6278, "step": 300 }, { "epoch": 0.01980360873069395, "grad_norm": 0.6013835668563843, "learning_rate": 9.02865864339416e-05, "loss": 1.7835, "step": 301 }, { "epoch": 0.019869401450729476, "grad_norm": 0.6507825255393982, "learning_rate": 9.022307540702576e-05, "loss": 1.8789, "step": 302 }, { "epoch": 0.019935194170765006, "grad_norm": 0.6571053862571716, "learning_rate": 9.015937991483439e-05, "loss": 1.8656, "step": 303 }, { "epoch": 0.020000986890800532, "grad_norm": 0.669532060623169, "learning_rate": 9.009550024947856e-05, "loss": 1.7856, "step": 304 }, { "epoch": 0.020066779610836062, "grad_norm": 0.6802514791488647, "learning_rate": 9.003143670391403e-05, "loss": 1.8545, "step": 305 }, { "epoch": 0.02013257233087159, "grad_norm": 0.6057747602462769, "learning_rate": 8.996718957193978e-05, "loss": 1.6657, "step": 306 }, { "epoch": 0.020198365050907118, "grad_norm": 0.5737309455871582, "learning_rate": 8.990275914819679e-05, "loss": 1.7686, "step": 307 }, { "epoch": 0.020264157770942644, "grad_norm": 0.5848949551582336, "learning_rate": 8.983814572816656e-05, "loss": 1.7321, "step": 308 }, { "epoch": 0.020329950490978174, "grad_norm": 0.5747925043106079, "learning_rate": 8.977334960816986e-05, "loss": 1.6342, "step": 309 }, { "epoch": 0.0203957432110137, "grad_norm": 0.6126972436904907, "learning_rate": 8.970837108536532e-05, "loss": 1.5879, "step": 310 }, { "epoch": 0.02046153593104923, "grad_norm": 0.6280604600906372, "learning_rate": 8.964321045774807e-05, "loss": 1.8228, "step": 311 }, { "epoch": 0.020527328651084756, "grad_norm": 0.6287879347801208, "learning_rate": 8.957786802414842e-05, "loss": 1.6301, "step": 312 }, { "epoch": 0.020593121371120286, "grad_norm": 0.6327916383743286, "learning_rate": 8.951234408423042e-05, "loss": 1.8064, "step": 313 }, { "epoch": 0.020658914091155813, "grad_norm": 0.6169136166572571, "learning_rate": 8.944663893849052e-05, "loss": 1.627, "step": 314 }, { "epoch": 0.020724706811191342, "grad_norm": 0.7315105199813843, "learning_rate": 8.938075288825622e-05, "loss": 1.5807, "step": 315 }, { "epoch": 0.02079049953122687, "grad_norm": 0.6365809440612793, "learning_rate": 8.93146862356846e-05, "loss": 1.775, "step": 316 }, { "epoch": 0.0208562922512624, "grad_norm": 0.6544710993766785, "learning_rate": 8.924843928376104e-05, "loss": 1.7399, "step": 317 }, { "epoch": 0.020922084971297925, "grad_norm": 0.5984869599342346, "learning_rate": 8.91820123362978e-05, "loss": 1.5659, "step": 318 }, { "epoch": 0.020987877691333454, "grad_norm": 0.6417421698570251, "learning_rate": 8.911540569793253e-05, "loss": 1.8156, "step": 319 }, { "epoch": 0.02105367041136898, "grad_norm": 0.6159281134605408, "learning_rate": 8.904861967412703e-05, "loss": 1.6519, "step": 320 }, { "epoch": 0.02111946313140451, "grad_norm": 0.6258522868156433, "learning_rate": 8.898165457116574e-05, "loss": 1.6219, "step": 321 }, { "epoch": 0.021185255851440037, "grad_norm": 0.6070511937141418, "learning_rate": 8.891451069615437e-05, "loss": 1.5032, "step": 322 }, { "epoch": 0.021251048571475566, "grad_norm": 0.6364402770996094, "learning_rate": 8.884718835701848e-05, "loss": 1.5849, "step": 323 }, { "epoch": 0.021316841291511093, "grad_norm": 0.6753417253494263, "learning_rate": 8.877968786250212e-05, "loss": 1.6483, "step": 324 }, { "epoch": 0.021382634011546622, "grad_norm": 0.6451675295829773, "learning_rate": 8.87120095221663e-05, "loss": 1.6688, "step": 325 }, { "epoch": 0.021448426731582152, "grad_norm": 0.6772330403327942, "learning_rate": 8.86441536463877e-05, "loss": 1.6518, "step": 326 }, { "epoch": 0.02151421945161768, "grad_norm": 0.6775709986686707, "learning_rate": 8.857612054635713e-05, "loss": 1.6329, "step": 327 }, { "epoch": 0.021580012171653208, "grad_norm": 0.7176543474197388, "learning_rate": 8.850791053407824e-05, "loss": 1.7176, "step": 328 }, { "epoch": 0.021645804891688734, "grad_norm": 0.8762123584747314, "learning_rate": 8.843952392236594e-05, "loss": 1.7178, "step": 329 }, { "epoch": 0.021711597611724264, "grad_norm": 0.680343508720398, "learning_rate": 8.837096102484508e-05, "loss": 1.7253, "step": 330 }, { "epoch": 0.02177739033175979, "grad_norm": 0.6655333042144775, "learning_rate": 8.83022221559489e-05, "loss": 1.5397, "step": 331 }, { "epoch": 0.02184318305179532, "grad_norm": 0.7247670292854309, "learning_rate": 8.823330763091775e-05, "loss": 1.5323, "step": 332 }, { "epoch": 0.021908975771830846, "grad_norm": 0.6783066987991333, "learning_rate": 8.816421776579749e-05, "loss": 1.578, "step": 333 }, { "epoch": 0.021974768491866376, "grad_norm": 0.7266553640365601, "learning_rate": 8.80949528774381e-05, "loss": 1.449, "step": 334 }, { "epoch": 0.022040561211901902, "grad_norm": 0.7339120507240295, "learning_rate": 8.802551328349222e-05, "loss": 1.5265, "step": 335 }, { "epoch": 0.022106353931937432, "grad_norm": 0.703031063079834, "learning_rate": 8.795589930241374e-05, "loss": 1.4054, "step": 336 }, { "epoch": 0.02217214665197296, "grad_norm": 0.8293249011039734, "learning_rate": 8.788611125345625e-05, "loss": 1.613, "step": 337 }, { "epoch": 0.022237939372008488, "grad_norm": 0.8584293127059937, "learning_rate": 8.781614945667169e-05, "loss": 1.693, "step": 338 }, { "epoch": 0.022303732092044014, "grad_norm": 0.7650991082191467, "learning_rate": 8.774601423290875e-05, "loss": 1.6419, "step": 339 }, { "epoch": 0.022369524812079544, "grad_norm": 0.8328055143356323, "learning_rate": 8.767570590381148e-05, "loss": 1.6992, "step": 340 }, { "epoch": 0.02243531753211507, "grad_norm": 0.8707652688026428, "learning_rate": 8.760522479181784e-05, "loss": 1.8145, "step": 341 }, { "epoch": 0.0225011102521506, "grad_norm": 0.8000150918960571, "learning_rate": 8.753457122015812e-05, "loss": 1.6967, "step": 342 }, { "epoch": 0.022566902972186127, "grad_norm": 0.7786499261856079, "learning_rate": 8.746374551285358e-05, "loss": 1.5119, "step": 343 }, { "epoch": 0.022632695692221656, "grad_norm": 0.8160056471824646, "learning_rate": 8.73927479947149e-05, "loss": 1.451, "step": 344 }, { "epoch": 0.022698488412257183, "grad_norm": 0.8968502283096313, "learning_rate": 8.732157899134063e-05, "loss": 1.5002, "step": 345 }, { "epoch": 0.022764281132292712, "grad_norm": 0.774455726146698, "learning_rate": 8.725023882911583e-05, "loss": 1.6271, "step": 346 }, { "epoch": 0.02283007385232824, "grad_norm": 0.8428329229354858, "learning_rate": 8.717872783521047e-05, "loss": 1.5726, "step": 347 }, { "epoch": 0.02289586657236377, "grad_norm": 0.972919225692749, "learning_rate": 8.710704633757796e-05, "loss": 1.4679, "step": 348 }, { "epoch": 0.022961659292399295, "grad_norm": 1.157837152481079, "learning_rate": 8.70351946649537e-05, "loss": 1.4656, "step": 349 }, { "epoch": 0.023027452012434824, "grad_norm": 1.2611714601516724, "learning_rate": 8.696317314685341e-05, "loss": 1.428, "step": 350 }, { "epoch": 0.02309324473247035, "grad_norm": 0.6036838889122009, "learning_rate": 8.689098211357187e-05, "loss": 1.7917, "step": 351 }, { "epoch": 0.02315903745250588, "grad_norm": 0.6515650153160095, "learning_rate": 8.681862189618118e-05, "loss": 1.7356, "step": 352 }, { "epoch": 0.023224830172541407, "grad_norm": 0.643792450428009, "learning_rate": 8.674609282652934e-05, "loss": 1.8185, "step": 353 }, { "epoch": 0.023290622892576936, "grad_norm": 0.6375747323036194, "learning_rate": 8.667339523723875e-05, "loss": 1.7393, "step": 354 }, { "epoch": 0.023356415612612466, "grad_norm": 0.601500391960144, "learning_rate": 8.660052946170459e-05, "loss": 1.6424, "step": 355 }, { "epoch": 0.023422208332647992, "grad_norm": 0.584318220615387, "learning_rate": 8.65274958340934e-05, "loss": 1.6143, "step": 356 }, { "epoch": 0.023488001052683522, "grad_norm": 0.5999240875244141, "learning_rate": 8.645429468934147e-05, "loss": 1.7236, "step": 357 }, { "epoch": 0.02355379377271905, "grad_norm": 0.6196637153625488, "learning_rate": 8.638092636315338e-05, "loss": 1.7524, "step": 358 }, { "epoch": 0.023619586492754578, "grad_norm": 0.6153246164321899, "learning_rate": 8.630739119200035e-05, "loss": 1.5623, "step": 359 }, { "epoch": 0.023685379212790104, "grad_norm": 0.5839650630950928, "learning_rate": 8.623368951311881e-05, "loss": 1.6695, "step": 360 }, { "epoch": 0.023751171932825634, "grad_norm": 0.5994311571121216, "learning_rate": 8.615982166450878e-05, "loss": 1.6348, "step": 361 }, { "epoch": 0.02381696465286116, "grad_norm": 0.6017064452171326, "learning_rate": 8.608578798493236e-05, "loss": 1.6742, "step": 362 }, { "epoch": 0.02388275737289669, "grad_norm": 0.6123796105384827, "learning_rate": 8.601158881391212e-05, "loss": 1.5242, "step": 363 }, { "epoch": 0.023948550092932216, "grad_norm": 0.5843928456306458, "learning_rate": 8.593722449172965e-05, "loss": 1.5024, "step": 364 }, { "epoch": 0.024014342812967746, "grad_norm": 0.5963286757469177, "learning_rate": 8.586269535942385e-05, "loss": 1.6205, "step": 365 }, { "epoch": 0.024080135533003273, "grad_norm": 0.5980545282363892, "learning_rate": 8.578800175878954e-05, "loss": 1.4792, "step": 366 }, { "epoch": 0.024145928253038802, "grad_norm": 0.6400820016860962, "learning_rate": 8.571314403237572e-05, "loss": 1.7103, "step": 367 }, { "epoch": 0.02421172097307433, "grad_norm": 0.6095431447029114, "learning_rate": 8.563812252348411e-05, "loss": 1.4906, "step": 368 }, { "epoch": 0.02427751369310986, "grad_norm": 0.6984130144119263, "learning_rate": 8.556293757616757e-05, "loss": 1.8139, "step": 369 }, { "epoch": 0.024343306413145385, "grad_norm": 0.6309308409690857, "learning_rate": 8.548758953522849e-05, "loss": 1.6624, "step": 370 }, { "epoch": 0.024409099133180914, "grad_norm": 0.5984780788421631, "learning_rate": 8.541207874621718e-05, "loss": 1.5398, "step": 371 }, { "epoch": 0.02447489185321644, "grad_norm": 0.7127171754837036, "learning_rate": 8.533640555543034e-05, "loss": 1.8378, "step": 372 }, { "epoch": 0.02454068457325197, "grad_norm": 0.6665642857551575, "learning_rate": 8.526057030990947e-05, "loss": 1.5849, "step": 373 }, { "epoch": 0.024606477293287497, "grad_norm": 0.6169716715812683, "learning_rate": 8.518457335743926e-05, "loss": 1.6305, "step": 374 }, { "epoch": 0.024672270013323026, "grad_norm": 0.6478058099746704, "learning_rate": 8.510841504654596e-05, "loss": 1.4958, "step": 375 }, { "epoch": 0.024738062733358553, "grad_norm": 0.672910749912262, "learning_rate": 8.50320957264959e-05, "loss": 1.7338, "step": 376 }, { "epoch": 0.024803855453394082, "grad_norm": 0.7039096355438232, "learning_rate": 8.495561574729369e-05, "loss": 1.5893, "step": 377 }, { "epoch": 0.02486964817342961, "grad_norm": 0.6732673645019531, "learning_rate": 8.487897545968084e-05, "loss": 1.6202, "step": 378 }, { "epoch": 0.02493544089346514, "grad_norm": 0.6896900534629822, "learning_rate": 8.480217521513399e-05, "loss": 1.6855, "step": 379 }, { "epoch": 0.025001233613500665, "grad_norm": 0.6888880729675293, "learning_rate": 8.472521536586335e-05, "loss": 1.7113, "step": 380 }, { "epoch": 0.025067026333536194, "grad_norm": 0.7276954650878906, "learning_rate": 8.464809626481111e-05, "loss": 1.7445, "step": 381 }, { "epoch": 0.02513281905357172, "grad_norm": 0.6776707172393799, "learning_rate": 8.457081826564979e-05, "loss": 1.5922, "step": 382 }, { "epoch": 0.02519861177360725, "grad_norm": 0.6573496460914612, "learning_rate": 8.449338172278059e-05, "loss": 1.6475, "step": 383 }, { "epoch": 0.02526440449364278, "grad_norm": 0.7136342525482178, "learning_rate": 8.441578699133185e-05, "loss": 1.7994, "step": 384 }, { "epoch": 0.025330197213678306, "grad_norm": 0.702238917350769, "learning_rate": 8.433803442715735e-05, "loss": 1.5636, "step": 385 }, { "epoch": 0.025395989933713836, "grad_norm": 0.754508376121521, "learning_rate": 8.426012438683473e-05, "loss": 1.7096, "step": 386 }, { "epoch": 0.025461782653749362, "grad_norm": 0.6970518231391907, "learning_rate": 8.418205722766374e-05, "loss": 1.6868, "step": 387 }, { "epoch": 0.025527575373784892, "grad_norm": 0.7553730010986328, "learning_rate": 8.410383330766478e-05, "loss": 1.7047, "step": 388 }, { "epoch": 0.02559336809382042, "grad_norm": 0.7503629326820374, "learning_rate": 8.402545298557712e-05, "loss": 1.7693, "step": 389 }, { "epoch": 0.025659160813855948, "grad_norm": 0.8078016638755798, "learning_rate": 8.394691662085731e-05, "loss": 1.665, "step": 390 }, { "epoch": 0.025724953533891474, "grad_norm": 0.7687540054321289, "learning_rate": 8.38682245736775e-05, "loss": 1.6099, "step": 391 }, { "epoch": 0.025790746253927004, "grad_norm": 0.8783352375030518, "learning_rate": 8.378937720492384e-05, "loss": 1.8321, "step": 392 }, { "epoch": 0.02585653897396253, "grad_norm": 0.7511425614356995, "learning_rate": 8.371037487619477e-05, "loss": 1.532, "step": 393 }, { "epoch": 0.02592233169399806, "grad_norm": 0.8450592756271362, "learning_rate": 8.363121794979938e-05, "loss": 1.5987, "step": 394 }, { "epoch": 0.025988124414033587, "grad_norm": 0.9435873031616211, "learning_rate": 8.355190678875578e-05, "loss": 1.6183, "step": 395 }, { "epoch": 0.026053917134069116, "grad_norm": 0.8024054169654846, "learning_rate": 8.347244175678938e-05, "loss": 1.6093, "step": 396 }, { "epoch": 0.026119709854104643, "grad_norm": 0.8227169513702393, "learning_rate": 8.33928232183313e-05, "loss": 1.4607, "step": 397 }, { "epoch": 0.026185502574140172, "grad_norm": 0.9674575328826904, "learning_rate": 8.331305153851658e-05, "loss": 1.5625, "step": 398 }, { "epoch": 0.0262512952941757, "grad_norm": 0.908593475818634, "learning_rate": 8.323312708318262e-05, "loss": 1.5447, "step": 399 }, { "epoch": 0.02631708801421123, "grad_norm": 1.2496719360351562, "learning_rate": 8.315305021886746e-05, "loss": 1.3417, "step": 400 }, { "epoch": 0.026382880734246755, "grad_norm": 0.5647311806678772, "learning_rate": 8.307282131280804e-05, "loss": 1.715, "step": 401 }, { "epoch": 0.026448673454282284, "grad_norm": 0.5769903063774109, "learning_rate": 8.299244073293866e-05, "loss": 1.7535, "step": 402 }, { "epoch": 0.02651446617431781, "grad_norm": 0.615765392780304, "learning_rate": 8.291190884788915e-05, "loss": 1.7596, "step": 403 }, { "epoch": 0.02658025889435334, "grad_norm": 0.6222158074378967, "learning_rate": 8.283122602698323e-05, "loss": 1.78, "step": 404 }, { "epoch": 0.026646051614388867, "grad_norm": 0.616362452507019, "learning_rate": 8.275039264023683e-05, "loss": 1.6605, "step": 405 }, { "epoch": 0.026711844334424396, "grad_norm": 0.63614422082901, "learning_rate": 8.26694090583564e-05, "loss": 1.6457, "step": 406 }, { "epoch": 0.026777637054459923, "grad_norm": 0.5714118480682373, "learning_rate": 8.258827565273718e-05, "loss": 1.5967, "step": 407 }, { "epoch": 0.026843429774495452, "grad_norm": 0.5620883107185364, "learning_rate": 8.250699279546151e-05, "loss": 1.5886, "step": 408 }, { "epoch": 0.02690922249453098, "grad_norm": 0.593058168888092, "learning_rate": 8.242556085929711e-05, "loss": 1.7853, "step": 409 }, { "epoch": 0.02697501521456651, "grad_norm": 0.6109753251075745, "learning_rate": 8.23439802176954e-05, "loss": 1.8228, "step": 410 }, { "epoch": 0.027040807934602035, "grad_norm": 0.5662338733673096, "learning_rate": 8.226225124478979e-05, "loss": 1.7658, "step": 411 }, { "epoch": 0.027106600654637564, "grad_norm": 0.586213231086731, "learning_rate": 8.218037431539391e-05, "loss": 1.7126, "step": 412 }, { "epoch": 0.02717239337467309, "grad_norm": 0.5898706912994385, "learning_rate": 8.209834980499995e-05, "loss": 1.6058, "step": 413 }, { "epoch": 0.02723818609470862, "grad_norm": 0.6345718502998352, "learning_rate": 8.201617808977689e-05, "loss": 1.6733, "step": 414 }, { "epoch": 0.02730397881474415, "grad_norm": 0.6307375431060791, "learning_rate": 8.193385954656883e-05, "loss": 1.6946, "step": 415 }, { "epoch": 0.027369771534779676, "grad_norm": 0.6170186996459961, "learning_rate": 8.185139455289322e-05, "loss": 1.6548, "step": 416 }, { "epoch": 0.027435564254815206, "grad_norm": 0.6755112409591675, "learning_rate": 8.17687834869391e-05, "loss": 1.814, "step": 417 }, { "epoch": 0.027501356974850732, "grad_norm": 0.636673629283905, "learning_rate": 8.16860267275655e-05, "loss": 1.7344, "step": 418 }, { "epoch": 0.027567149694886262, "grad_norm": 0.6432463526725769, "learning_rate": 8.160312465429952e-05, "loss": 1.5055, "step": 419 }, { "epoch": 0.02763294241492179, "grad_norm": 0.6000040769577026, "learning_rate": 8.152007764733471e-05, "loss": 1.4705, "step": 420 }, { "epoch": 0.027698735134957318, "grad_norm": 0.7080824375152588, "learning_rate": 8.14368860875293e-05, "loss": 1.7144, "step": 421 }, { "epoch": 0.027764527854992845, "grad_norm": 0.6095240712165833, "learning_rate": 8.135355035640444e-05, "loss": 1.5537, "step": 422 }, { "epoch": 0.027830320575028374, "grad_norm": 0.684853196144104, "learning_rate": 8.127007083614245e-05, "loss": 1.5419, "step": 423 }, { "epoch": 0.0278961132950639, "grad_norm": 0.6135299205780029, "learning_rate": 8.118644790958509e-05, "loss": 1.5624, "step": 424 }, { "epoch": 0.02796190601509943, "grad_norm": 0.6529998779296875, "learning_rate": 8.110268196023179e-05, "loss": 1.6981, "step": 425 }, { "epoch": 0.028027698735134957, "grad_norm": 0.7151209115982056, "learning_rate": 8.101877337223786e-05, "loss": 1.6974, "step": 426 }, { "epoch": 0.028093491455170486, "grad_norm": 0.6477805972099304, "learning_rate": 8.093472253041281e-05, "loss": 1.6776, "step": 427 }, { "epoch": 0.028159284175206013, "grad_norm": 0.6722093224525452, "learning_rate": 8.085052982021847e-05, "loss": 1.6938, "step": 428 }, { "epoch": 0.028225076895241542, "grad_norm": 0.6491847038269043, "learning_rate": 8.076619562776737e-05, "loss": 1.638, "step": 429 }, { "epoch": 0.02829086961527707, "grad_norm": 0.6507553458213806, "learning_rate": 8.06817203398208e-05, "loss": 1.5585, "step": 430 }, { "epoch": 0.0283566623353126, "grad_norm": 0.6638416051864624, "learning_rate": 8.059710434378715e-05, "loss": 1.5053, "step": 431 }, { "epoch": 0.028422455055348125, "grad_norm": 0.6749764680862427, "learning_rate": 8.051234802772017e-05, "loss": 1.6681, "step": 432 }, { "epoch": 0.028488247775383654, "grad_norm": 0.6888801455497742, "learning_rate": 8.042745178031702e-05, "loss": 1.5055, "step": 433 }, { "epoch": 0.02855404049541918, "grad_norm": 0.6788802742958069, "learning_rate": 8.034241599091665e-05, "loss": 1.6874, "step": 434 }, { "epoch": 0.02861983321545471, "grad_norm": 0.6617124676704407, "learning_rate": 8.025724104949799e-05, "loss": 1.5684, "step": 435 }, { "epoch": 0.028685625935490237, "grad_norm": 0.6838613748550415, "learning_rate": 8.017192734667802e-05, "loss": 1.5228, "step": 436 }, { "epoch": 0.028751418655525766, "grad_norm": 0.7063144445419312, "learning_rate": 8.008647527371023e-05, "loss": 1.476, "step": 437 }, { "epoch": 0.028817211375561293, "grad_norm": 0.7079107761383057, "learning_rate": 8.000088522248255e-05, "loss": 1.4906, "step": 438 }, { "epoch": 0.028883004095596822, "grad_norm": 0.7689743041992188, "learning_rate": 7.991515758551577e-05, "loss": 1.5851, "step": 439 }, { "epoch": 0.02894879681563235, "grad_norm": 0.807818591594696, "learning_rate": 7.982929275596166e-05, "loss": 1.6402, "step": 440 }, { "epoch": 0.02901458953566788, "grad_norm": 0.7465726733207703, "learning_rate": 7.97432911276011e-05, "loss": 1.719, "step": 441 }, { "epoch": 0.029080382255703405, "grad_norm": 0.7983351945877075, "learning_rate": 7.965715309484237e-05, "loss": 1.6622, "step": 442 }, { "epoch": 0.029146174975738934, "grad_norm": 0.8748970031738281, "learning_rate": 7.957087905271934e-05, "loss": 1.5992, "step": 443 }, { "epoch": 0.029211967695774464, "grad_norm": 0.7884351015090942, "learning_rate": 7.948446939688956e-05, "loss": 1.4298, "step": 444 }, { "epoch": 0.02927776041580999, "grad_norm": 0.7786914110183716, "learning_rate": 7.939792452363259e-05, "loss": 1.4742, "step": 445 }, { "epoch": 0.02934355313584552, "grad_norm": 0.8521863222122192, "learning_rate": 7.931124482984802e-05, "loss": 1.6724, "step": 446 }, { "epoch": 0.029409345855881047, "grad_norm": 0.8581932187080383, "learning_rate": 7.92244307130538e-05, "loss": 1.4776, "step": 447 }, { "epoch": 0.029475138575916576, "grad_norm": 0.8381912112236023, "learning_rate": 7.913748257138434e-05, "loss": 1.3711, "step": 448 }, { "epoch": 0.029540931295952103, "grad_norm": 0.9005985260009766, "learning_rate": 7.905040080358868e-05, "loss": 1.4701, "step": 449 }, { "epoch": 0.029606724015987632, "grad_norm": 1.295373558998108, "learning_rate": 7.896318580902867e-05, "loss": 1.3209, "step": 450 }, { "epoch": 0.02967251673602316, "grad_norm": 0.5462653040885925, "learning_rate": 7.887583798767717e-05, "loss": 1.716, "step": 451 }, { "epoch": 0.02973830945605869, "grad_norm": 0.5886077284812927, "learning_rate": 7.878835774011615e-05, "loss": 1.7081, "step": 452 }, { "epoch": 0.029804102176094215, "grad_norm": 0.5938754081726074, "learning_rate": 7.870074546753497e-05, "loss": 1.714, "step": 453 }, { "epoch": 0.029869894896129744, "grad_norm": 0.6293351054191589, "learning_rate": 7.861300157172837e-05, "loss": 1.7798, "step": 454 }, { "epoch": 0.02993568761616527, "grad_norm": 0.5989140868186951, "learning_rate": 7.85251264550948e-05, "loss": 1.7753, "step": 455 }, { "epoch": 0.0300014803362008, "grad_norm": 0.5683930516242981, "learning_rate": 7.843712052063446e-05, "loss": 1.5767, "step": 456 }, { "epoch": 0.030067273056236327, "grad_norm": 0.6076240539550781, "learning_rate": 7.834898417194747e-05, "loss": 1.737, "step": 457 }, { "epoch": 0.030133065776271856, "grad_norm": 0.6135837435722351, "learning_rate": 7.826071781323207e-05, "loss": 1.7466, "step": 458 }, { "epoch": 0.030198858496307383, "grad_norm": 0.6038995981216431, "learning_rate": 7.817232184928276e-05, "loss": 1.5442, "step": 459 }, { "epoch": 0.030264651216342912, "grad_norm": 0.6276111602783203, "learning_rate": 7.808379668548834e-05, "loss": 1.6534, "step": 460 }, { "epoch": 0.03033044393637844, "grad_norm": 0.6474449634552002, "learning_rate": 7.799514272783014e-05, "loss": 1.8461, "step": 461 }, { "epoch": 0.03039623665641397, "grad_norm": 0.6045409440994263, "learning_rate": 7.790636038288023e-05, "loss": 1.6837, "step": 462 }, { "epoch": 0.030462029376449495, "grad_norm": 0.583416223526001, "learning_rate": 7.781745005779938e-05, "loss": 1.7384, "step": 463 }, { "epoch": 0.030527822096485024, "grad_norm": 0.6061449646949768, "learning_rate": 7.772841216033533e-05, "loss": 1.6841, "step": 464 }, { "epoch": 0.03059361481652055, "grad_norm": 0.6019045114517212, "learning_rate": 7.763924709882086e-05, "loss": 1.6674, "step": 465 }, { "epoch": 0.03065940753655608, "grad_norm": 0.6233644485473633, "learning_rate": 7.754995528217194e-05, "loss": 1.6055, "step": 466 }, { "epoch": 0.030725200256591607, "grad_norm": 0.6329379081726074, "learning_rate": 7.746053711988583e-05, "loss": 1.7531, "step": 467 }, { "epoch": 0.030790992976627136, "grad_norm": 0.6121736168861389, "learning_rate": 7.737099302203923e-05, "loss": 1.5287, "step": 468 }, { "epoch": 0.030856785696662663, "grad_norm": 0.6427604556083679, "learning_rate": 7.728132339928638e-05, "loss": 1.8374, "step": 469 }, { "epoch": 0.030922578416698192, "grad_norm": 0.659582793712616, "learning_rate": 7.719152866285721e-05, "loss": 1.6771, "step": 470 }, { "epoch": 0.03098837113673372, "grad_norm": 0.6290009021759033, "learning_rate": 7.710160922455539e-05, "loss": 1.7209, "step": 471 }, { "epoch": 0.03105416385676925, "grad_norm": 0.6897339224815369, "learning_rate": 7.70115654967565e-05, "loss": 1.5573, "step": 472 }, { "epoch": 0.031119956576804778, "grad_norm": 0.6670756936073303, "learning_rate": 7.692139789240611e-05, "loss": 1.6864, "step": 473 }, { "epoch": 0.031185749296840305, "grad_norm": 0.6782425045967102, "learning_rate": 7.68311068250179e-05, "loss": 1.7276, "step": 474 }, { "epoch": 0.031251542016875834, "grad_norm": 0.6851603388786316, "learning_rate": 7.674069270867181e-05, "loss": 1.6012, "step": 475 }, { "epoch": 0.03131733473691136, "grad_norm": 0.6849592328071594, "learning_rate": 7.665015595801197e-05, "loss": 1.6589, "step": 476 }, { "epoch": 0.03138312745694689, "grad_norm": 0.6817401051521301, "learning_rate": 7.6559496988245e-05, "loss": 1.5357, "step": 477 }, { "epoch": 0.03144892017698242, "grad_norm": 0.6632989645004272, "learning_rate": 7.646871621513807e-05, "loss": 1.6905, "step": 478 }, { "epoch": 0.031514712897017946, "grad_norm": 0.7022443413734436, "learning_rate": 7.637781405501681e-05, "loss": 1.5377, "step": 479 }, { "epoch": 0.03158050561705347, "grad_norm": 0.6468812227249146, "learning_rate": 7.628679092476367e-05, "loss": 1.6535, "step": 480 }, { "epoch": 0.031646298337089, "grad_norm": 0.6711928844451904, "learning_rate": 7.619564724181578e-05, "loss": 1.5319, "step": 481 }, { "epoch": 0.03171209105712453, "grad_norm": 0.8006902933120728, "learning_rate": 7.610438342416319e-05, "loss": 1.6375, "step": 482 }, { "epoch": 0.03177788377716006, "grad_norm": 0.6482203602790833, "learning_rate": 7.601299989034689e-05, "loss": 1.4045, "step": 483 }, { "epoch": 0.031843676497195585, "grad_norm": 0.7363543510437012, "learning_rate": 7.592149705945686e-05, "loss": 1.8084, "step": 484 }, { "epoch": 0.03190946921723111, "grad_norm": 0.7123915553092957, "learning_rate": 7.582987535113023e-05, "loss": 1.5565, "step": 485 }, { "epoch": 0.031975261937266644, "grad_norm": 0.740925133228302, "learning_rate": 7.573813518554925e-05, "loss": 1.5924, "step": 486 }, { "epoch": 0.03204105465730217, "grad_norm": 0.6997798085212708, "learning_rate": 7.56462769834395e-05, "loss": 1.6812, "step": 487 }, { "epoch": 0.0321068473773377, "grad_norm": 0.6777325868606567, "learning_rate": 7.555430116606778e-05, "loss": 1.5262, "step": 488 }, { "epoch": 0.03217264009737322, "grad_norm": 0.7830193638801575, "learning_rate": 7.546220815524036e-05, "loss": 1.5377, "step": 489 }, { "epoch": 0.032238432817408756, "grad_norm": 0.709225594997406, "learning_rate": 7.536999837330099e-05, "loss": 1.6136, "step": 490 }, { "epoch": 0.03230422553744428, "grad_norm": 0.7750362753868103, "learning_rate": 7.527767224312883e-05, "loss": 1.6665, "step": 491 }, { "epoch": 0.03237001825747981, "grad_norm": 0.7182449698448181, "learning_rate": 7.51852301881367e-05, "loss": 1.6616, "step": 492 }, { "epoch": 0.032435810977515335, "grad_norm": 0.7855557799339294, "learning_rate": 7.509267263226905e-05, "loss": 1.4291, "step": 493 }, { "epoch": 0.03250160369755087, "grad_norm": 0.8149724006652832, "learning_rate": 7.500000000000001e-05, "loss": 1.631, "step": 494 }, { "epoch": 0.032567396417586394, "grad_norm": 0.8115766644477844, "learning_rate": 7.490721271633145e-05, "loss": 1.5897, "step": 495 }, { "epoch": 0.03263318913762192, "grad_norm": 0.7797816395759583, "learning_rate": 7.481431120679106e-05, "loss": 1.5576, "step": 496 }, { "epoch": 0.03269898185765745, "grad_norm": 0.8388152718544006, "learning_rate": 7.472129589743033e-05, "loss": 1.5128, "step": 497 }, { "epoch": 0.03276477457769298, "grad_norm": 0.8746300935745239, "learning_rate": 7.462816721482274e-05, "loss": 1.3431, "step": 498 }, { "epoch": 0.032830567297728507, "grad_norm": 0.9929096102714539, "learning_rate": 7.453492558606157e-05, "loss": 1.6894, "step": 499 }, { "epoch": 0.03289636001776403, "grad_norm": 1.0941029787063599, "learning_rate": 7.44415714387582e-05, "loss": 1.122, "step": 500 }, { "epoch": 0.03296215273779956, "grad_norm": 0.5158244967460632, "learning_rate": 7.434810520103995e-05, "loss": 1.6916, "step": 501 }, { "epoch": 0.03302794545783509, "grad_norm": 0.5751031637191772, "learning_rate": 7.425452730154823e-05, "loss": 1.6563, "step": 502 }, { "epoch": 0.03309373817787062, "grad_norm": 0.6221696138381958, "learning_rate": 7.416083816943653e-05, "loss": 1.6673, "step": 503 }, { "epoch": 0.033159530897906145, "grad_norm": 0.6054139137268066, "learning_rate": 7.406703823436845e-05, "loss": 1.7057, "step": 504 }, { "epoch": 0.03322532361794168, "grad_norm": 0.6290459036827087, "learning_rate": 7.397312792651571e-05, "loss": 1.8151, "step": 505 }, { "epoch": 0.033291116337977204, "grad_norm": 0.5580724477767944, "learning_rate": 7.38791076765563e-05, "loss": 1.4864, "step": 506 }, { "epoch": 0.03335690905801273, "grad_norm": 0.5801160931587219, "learning_rate": 7.378497791567232e-05, "loss": 1.7467, "step": 507 }, { "epoch": 0.03342270177804826, "grad_norm": 0.5747870206832886, "learning_rate": 7.36907390755481e-05, "loss": 1.7234, "step": 508 }, { "epoch": 0.03348849449808379, "grad_norm": 0.5852952599525452, "learning_rate": 7.359639158836828e-05, "loss": 1.7107, "step": 509 }, { "epoch": 0.033554287218119316, "grad_norm": 0.5938292145729065, "learning_rate": 7.350193588681569e-05, "loss": 1.7132, "step": 510 }, { "epoch": 0.03362007993815484, "grad_norm": 0.5472443103790283, "learning_rate": 7.340737240406945e-05, "loss": 1.6635, "step": 511 }, { "epoch": 0.03368587265819037, "grad_norm": 0.5958325862884521, "learning_rate": 7.331270157380303e-05, "loss": 1.8009, "step": 512 }, { "epoch": 0.0337516653782259, "grad_norm": 0.5720498561859131, "learning_rate": 7.321792383018213e-05, "loss": 1.6974, "step": 513 }, { "epoch": 0.03381745809826143, "grad_norm": 0.5843756198883057, "learning_rate": 7.312303960786278e-05, "loss": 1.5999, "step": 514 }, { "epoch": 0.033883250818296955, "grad_norm": 0.5933427810668945, "learning_rate": 7.302804934198936e-05, "loss": 1.7271, "step": 515 }, { "epoch": 0.03394904353833248, "grad_norm": 0.5814589858055115, "learning_rate": 7.293295346819253e-05, "loss": 1.5918, "step": 516 }, { "epoch": 0.034014836258368014, "grad_norm": 0.6152416467666626, "learning_rate": 7.283775242258728e-05, "loss": 1.7409, "step": 517 }, { "epoch": 0.03408062897840354, "grad_norm": 0.5571946501731873, "learning_rate": 7.274244664177097e-05, "loss": 1.59, "step": 518 }, { "epoch": 0.03414642169843907, "grad_norm": 0.7412462830543518, "learning_rate": 7.264703656282122e-05, "loss": 1.7138, "step": 519 }, { "epoch": 0.03421221441847459, "grad_norm": 0.6679773330688477, "learning_rate": 7.2551522623294e-05, "loss": 1.7234, "step": 520 }, { "epoch": 0.034278007138510126, "grad_norm": 0.6583961844444275, "learning_rate": 7.245590526122159e-05, "loss": 1.7472, "step": 521 }, { "epoch": 0.03434379985854565, "grad_norm": 0.6237342953681946, "learning_rate": 7.236018491511053e-05, "loss": 1.5379, "step": 522 }, { "epoch": 0.03440959257858118, "grad_norm": 0.601783275604248, "learning_rate": 7.226436202393973e-05, "loss": 1.5308, "step": 523 }, { "epoch": 0.034475385298616705, "grad_norm": 0.6741359829902649, "learning_rate": 7.216843702715831e-05, "loss": 1.65, "step": 524 }, { "epoch": 0.03454117801865224, "grad_norm": 0.6531434059143066, "learning_rate": 7.207241036468368e-05, "loss": 1.577, "step": 525 }, { "epoch": 0.034606970738687765, "grad_norm": 0.6689597964286804, "learning_rate": 7.197628247689951e-05, "loss": 1.6195, "step": 526 }, { "epoch": 0.03467276345872329, "grad_norm": 0.6538329124450684, "learning_rate": 7.188005380465364e-05, "loss": 1.5067, "step": 527 }, { "epoch": 0.03473855617875882, "grad_norm": 0.6358965635299683, "learning_rate": 7.178372478925617e-05, "loss": 1.6048, "step": 528 }, { "epoch": 0.03480434889879435, "grad_norm": 0.6213113069534302, "learning_rate": 7.168729587247735e-05, "loss": 1.5352, "step": 529 }, { "epoch": 0.03487014161882988, "grad_norm": 0.656021237373352, "learning_rate": 7.159076749654559e-05, "loss": 1.6022, "step": 530 }, { "epoch": 0.0349359343388654, "grad_norm": 0.7110906839370728, "learning_rate": 7.149414010414541e-05, "loss": 1.6512, "step": 531 }, { "epoch": 0.035001727058900936, "grad_norm": 0.6649793982505798, "learning_rate": 7.139741413841548e-05, "loss": 1.5512, "step": 532 }, { "epoch": 0.03506751977893646, "grad_norm": 0.6741034388542175, "learning_rate": 7.130059004294647e-05, "loss": 1.5785, "step": 533 }, { "epoch": 0.03513331249897199, "grad_norm": 0.7132993340492249, "learning_rate": 7.12036682617791e-05, "loss": 1.6398, "step": 534 }, { "epoch": 0.035199105219007515, "grad_norm": 0.6859109401702881, "learning_rate": 7.110664923940209e-05, "loss": 1.6834, "step": 535 }, { "epoch": 0.03526489793904305, "grad_norm": 0.7714479565620422, "learning_rate": 7.10095334207501e-05, "loss": 1.537, "step": 536 }, { "epoch": 0.035330690659078574, "grad_norm": 0.6838414072990417, "learning_rate": 7.091232125120171e-05, "loss": 1.4921, "step": 537 }, { "epoch": 0.0353964833791141, "grad_norm": 0.7683415412902832, "learning_rate": 7.081501317657739e-05, "loss": 1.5517, "step": 538 }, { "epoch": 0.03546227609914963, "grad_norm": 0.8450960516929626, "learning_rate": 7.07176096431374e-05, "loss": 1.7357, "step": 539 }, { "epoch": 0.03552806881918516, "grad_norm": 0.7302371859550476, "learning_rate": 7.06201110975798e-05, "loss": 1.5579, "step": 540 }, { "epoch": 0.035593861539220686, "grad_norm": 0.7548546195030212, "learning_rate": 7.052251798703838e-05, "loss": 1.4625, "step": 541 }, { "epoch": 0.03565965425925621, "grad_norm": 0.7929666638374329, "learning_rate": 7.042483075908062e-05, "loss": 1.6235, "step": 542 }, { "epoch": 0.03572544697929174, "grad_norm": 0.719232976436615, "learning_rate": 7.032704986170559e-05, "loss": 1.4643, "step": 543 }, { "epoch": 0.03579123969932727, "grad_norm": 0.791790246963501, "learning_rate": 7.022917574334199e-05, "loss": 1.6799, "step": 544 }, { "epoch": 0.0358570324193628, "grad_norm": 0.8136597275733948, "learning_rate": 7.013120885284598e-05, "loss": 1.5832, "step": 545 }, { "epoch": 0.035922825139398325, "grad_norm": 0.8112201690673828, "learning_rate": 7.003314963949922e-05, "loss": 1.6149, "step": 546 }, { "epoch": 0.03598861785943385, "grad_norm": 0.8147010207176208, "learning_rate": 6.993499855300673e-05, "loss": 1.5676, "step": 547 }, { "epoch": 0.036054410579469384, "grad_norm": 1.0767403841018677, "learning_rate": 6.983675604349493e-05, "loss": 1.742, "step": 548 }, { "epoch": 0.03612020329950491, "grad_norm": 0.9000462889671326, "learning_rate": 6.973842256150941e-05, "loss": 1.3218, "step": 549 }, { "epoch": 0.03618599601954044, "grad_norm": 1.4560507535934448, "learning_rate": 6.963999855801308e-05, "loss": 1.486, "step": 550 }, { "epoch": 0.03625178873957596, "grad_norm": 0.5187786221504211, "learning_rate": 6.954148448438389e-05, "loss": 1.6111, "step": 551 }, { "epoch": 0.036317581459611496, "grad_norm": 0.5974017381668091, "learning_rate": 6.944288079241293e-05, "loss": 1.7928, "step": 552 }, { "epoch": 0.03638337417964702, "grad_norm": 0.5858004689216614, "learning_rate": 6.934418793430221e-05, "loss": 1.6127, "step": 553 }, { "epoch": 0.03644916689968255, "grad_norm": 0.5808974504470825, "learning_rate": 6.924540636266272e-05, "loss": 1.8917, "step": 554 }, { "epoch": 0.036514959619718075, "grad_norm": 0.5334272980690002, "learning_rate": 6.91465365305123e-05, "loss": 1.4775, "step": 555 }, { "epoch": 0.03658075233975361, "grad_norm": 0.5895127654075623, "learning_rate": 6.90475788912735e-05, "loss": 1.7911, "step": 556 }, { "epoch": 0.036646545059789135, "grad_norm": 0.5641903281211853, "learning_rate": 6.894853389877163e-05, "loss": 1.6109, "step": 557 }, { "epoch": 0.03671233777982466, "grad_norm": 0.5841579437255859, "learning_rate": 6.884940200723255e-05, "loss": 1.5897, "step": 558 }, { "epoch": 0.03677813049986019, "grad_norm": 0.5516168475151062, "learning_rate": 6.875018367128065e-05, "loss": 1.5645, "step": 559 }, { "epoch": 0.03684392321989572, "grad_norm": 0.5693150758743286, "learning_rate": 6.86508793459368e-05, "loss": 1.6858, "step": 560 }, { "epoch": 0.03690971593993125, "grad_norm": 0.5755842924118042, "learning_rate": 6.855148948661616e-05, "loss": 1.69, "step": 561 }, { "epoch": 0.03697550865996677, "grad_norm": 0.5793744325637817, "learning_rate": 6.845201454912621e-05, "loss": 1.5982, "step": 562 }, { "epoch": 0.037041301380002306, "grad_norm": 0.6278342604637146, "learning_rate": 6.835245498966461e-05, "loss": 1.6091, "step": 563 }, { "epoch": 0.03710709410003783, "grad_norm": 0.5617616772651672, "learning_rate": 6.825281126481703e-05, "loss": 1.546, "step": 564 }, { "epoch": 0.03717288682007336, "grad_norm": 0.5803874135017395, "learning_rate": 6.815308383155519e-05, "loss": 1.7211, "step": 565 }, { "epoch": 0.037238679540108885, "grad_norm": 0.5826717019081116, "learning_rate": 6.805327314723468e-05, "loss": 1.5747, "step": 566 }, { "epoch": 0.03730447226014442, "grad_norm": 0.598685622215271, "learning_rate": 6.795337966959289e-05, "loss": 1.5133, "step": 567 }, { "epoch": 0.037370264980179944, "grad_norm": 0.6577322483062744, "learning_rate": 6.785340385674688e-05, "loss": 1.63, "step": 568 }, { "epoch": 0.03743605770021547, "grad_norm": 0.6314491629600525, "learning_rate": 6.775334616719136e-05, "loss": 1.647, "step": 569 }, { "epoch": 0.037501850420251, "grad_norm": 0.6178677678108215, "learning_rate": 6.765320705979646e-05, "loss": 1.5232, "step": 570 }, { "epoch": 0.03756764314028653, "grad_norm": 0.6231119632720947, "learning_rate": 6.755298699380573e-05, "loss": 1.6916, "step": 571 }, { "epoch": 0.037633435860322056, "grad_norm": 0.7361608147621155, "learning_rate": 6.745268642883404e-05, "loss": 1.5717, "step": 572 }, { "epoch": 0.03769922858035758, "grad_norm": 0.635944128036499, "learning_rate": 6.735230582486537e-05, "loss": 1.5291, "step": 573 }, { "epoch": 0.03776502130039311, "grad_norm": 0.657894492149353, "learning_rate": 6.725184564225081e-05, "loss": 1.583, "step": 574 }, { "epoch": 0.03783081402042864, "grad_norm": 0.6822555661201477, "learning_rate": 6.715130634170635e-05, "loss": 1.5137, "step": 575 }, { "epoch": 0.03789660674046417, "grad_norm": 0.6607404351234436, "learning_rate": 6.705068838431086e-05, "loss": 1.5549, "step": 576 }, { "epoch": 0.037962399460499695, "grad_norm": 0.6624849438667297, "learning_rate": 6.694999223150395e-05, "loss": 1.5693, "step": 577 }, { "epoch": 0.03802819218053522, "grad_norm": 0.7212957143783569, "learning_rate": 6.684921834508379e-05, "loss": 1.6685, "step": 578 }, { "epoch": 0.038093984900570754, "grad_norm": 0.7036564350128174, "learning_rate": 6.674836718720506e-05, "loss": 1.5121, "step": 579 }, { "epoch": 0.03815977762060628, "grad_norm": 0.6782734990119934, "learning_rate": 6.664743922037684e-05, "loss": 1.479, "step": 580 }, { "epoch": 0.03822557034064181, "grad_norm": 0.6969690918922424, "learning_rate": 6.654643490746042e-05, "loss": 1.6161, "step": 581 }, { "epoch": 0.03829136306067733, "grad_norm": 0.7635255455970764, "learning_rate": 6.644535471166724e-05, "loss": 1.7393, "step": 582 }, { "epoch": 0.038357155780712866, "grad_norm": 0.7802557945251465, "learning_rate": 6.634419909655672e-05, "loss": 1.6247, "step": 583 }, { "epoch": 0.03842294850074839, "grad_norm": 0.7853055000305176, "learning_rate": 6.624296852603419e-05, "loss": 1.8486, "step": 584 }, { "epoch": 0.03848874122078392, "grad_norm": 0.699921727180481, "learning_rate": 6.61416634643487e-05, "loss": 1.5465, "step": 585 }, { "epoch": 0.038554533940819445, "grad_norm": 0.7867047190666199, "learning_rate": 6.604028437609095e-05, "loss": 1.6135, "step": 586 }, { "epoch": 0.03862032666085498, "grad_norm": 0.816728949546814, "learning_rate": 6.593883172619111e-05, "loss": 1.7464, "step": 587 }, { "epoch": 0.038686119380890505, "grad_norm": 0.7413901686668396, "learning_rate": 6.583730597991671e-05, "loss": 1.6291, "step": 588 }, { "epoch": 0.03875191210092603, "grad_norm": 0.8019276261329651, "learning_rate": 6.573570760287048e-05, "loss": 1.5239, "step": 589 }, { "epoch": 0.03881770482096156, "grad_norm": 0.7708191275596619, "learning_rate": 6.563403706098833e-05, "loss": 1.6306, "step": 590 }, { "epoch": 0.03888349754099709, "grad_norm": 0.7502703070640564, "learning_rate": 6.553229482053698e-05, "loss": 1.6766, "step": 591 }, { "epoch": 0.03894929026103262, "grad_norm": 0.8003950119018555, "learning_rate": 6.543048134811209e-05, "loss": 1.5102, "step": 592 }, { "epoch": 0.03901508298106814, "grad_norm": 0.8051195740699768, "learning_rate": 6.532859711063594e-05, "loss": 1.4946, "step": 593 }, { "epoch": 0.039080875701103676, "grad_norm": 0.7975060939788818, "learning_rate": 6.522664257535533e-05, "loss": 1.3503, "step": 594 }, { "epoch": 0.0391466684211392, "grad_norm": 0.8413264751434326, "learning_rate": 6.512461820983946e-05, "loss": 1.4129, "step": 595 }, { "epoch": 0.03921246114117473, "grad_norm": 0.94151771068573, "learning_rate": 6.502252448197782e-05, "loss": 1.5474, "step": 596 }, { "epoch": 0.039278253861210255, "grad_norm": 0.9534524083137512, "learning_rate": 6.49203618599779e-05, "loss": 1.7038, "step": 597 }, { "epoch": 0.03934404658124579, "grad_norm": 0.959064781665802, "learning_rate": 6.481813081236328e-05, "loss": 1.5944, "step": 598 }, { "epoch": 0.039409839301281314, "grad_norm": 1.0186463594436646, "learning_rate": 6.471583180797121e-05, "loss": 1.4035, "step": 599 }, { "epoch": 0.03947563202131684, "grad_norm": 1.1875278949737549, "learning_rate": 6.461346531595065e-05, "loss": 1.4765, "step": 600 }, { "epoch": 0.03954142474135237, "grad_norm": 0.5883246660232544, "learning_rate": 6.45110318057601e-05, "loss": 1.8354, "step": 601 }, { "epoch": 0.0396072174613879, "grad_norm": 0.6155911684036255, "learning_rate": 6.440853174716534e-05, "loss": 1.5164, "step": 602 }, { "epoch": 0.039673010181423427, "grad_norm": 0.5574658513069153, "learning_rate": 6.43059656102374e-05, "loss": 1.6834, "step": 603 }, { "epoch": 0.03973880290145895, "grad_norm": 0.6176509261131287, "learning_rate": 6.420333386535032e-05, "loss": 1.8324, "step": 604 }, { "epoch": 0.03980459562149448, "grad_norm": 0.545872151851654, "learning_rate": 6.410063698317901e-05, "loss": 1.6041, "step": 605 }, { "epoch": 0.03987038834153001, "grad_norm": 0.60647052526474, "learning_rate": 6.399787543469715e-05, "loss": 1.5879, "step": 606 }, { "epoch": 0.03993618106156554, "grad_norm": 0.566210150718689, "learning_rate": 6.389504969117492e-05, "loss": 1.7142, "step": 607 }, { "epoch": 0.040001973781601065, "grad_norm": 0.6033192276954651, "learning_rate": 6.379216022417696e-05, "loss": 1.7099, "step": 608 }, { "epoch": 0.04006776650163659, "grad_norm": 0.5859748721122742, "learning_rate": 6.368920750556012e-05, "loss": 1.7776, "step": 609 }, { "epoch": 0.040133559221672124, "grad_norm": 0.6001906394958496, "learning_rate": 6.358619200747132e-05, "loss": 1.8193, "step": 610 }, { "epoch": 0.04019935194170765, "grad_norm": 0.5797659754753113, "learning_rate": 6.348311420234542e-05, "loss": 1.5826, "step": 611 }, { "epoch": 0.04026514466174318, "grad_norm": 0.5737758874893188, "learning_rate": 6.3379974562903e-05, "loss": 1.7274, "step": 612 }, { "epoch": 0.0403309373817787, "grad_norm": 0.5750417709350586, "learning_rate": 6.32767735621482e-05, "loss": 1.7693, "step": 613 }, { "epoch": 0.040396730101814236, "grad_norm": 0.5750201940536499, "learning_rate": 6.31735116733666e-05, "loss": 1.6943, "step": 614 }, { "epoch": 0.04046252282184976, "grad_norm": 0.6071341633796692, "learning_rate": 6.307018937012303e-05, "loss": 1.6225, "step": 615 }, { "epoch": 0.04052831554188529, "grad_norm": 0.6592499613761902, "learning_rate": 6.296680712625932e-05, "loss": 1.6679, "step": 616 }, { "epoch": 0.040594108261920815, "grad_norm": 0.5857351422309875, "learning_rate": 6.286336541589224e-05, "loss": 1.5893, "step": 617 }, { "epoch": 0.04065990098195635, "grad_norm": 0.606536328792572, "learning_rate": 6.275986471341125e-05, "loss": 1.7409, "step": 618 }, { "epoch": 0.040725693701991875, "grad_norm": 0.5929151773452759, "learning_rate": 6.26563054934764e-05, "loss": 1.6339, "step": 619 }, { "epoch": 0.0407914864220274, "grad_norm": 0.6137556433677673, "learning_rate": 6.255268823101605e-05, "loss": 1.6275, "step": 620 }, { "epoch": 0.040857279142062934, "grad_norm": 0.6244606971740723, "learning_rate": 6.244901340122475e-05, "loss": 1.7192, "step": 621 }, { "epoch": 0.04092307186209846, "grad_norm": 0.6325397491455078, "learning_rate": 6.234528147956108e-05, "loss": 1.6312, "step": 622 }, { "epoch": 0.04098886458213399, "grad_norm": 0.6136852502822876, "learning_rate": 6.224149294174548e-05, "loss": 1.564, "step": 623 }, { "epoch": 0.04105465730216951, "grad_norm": 0.6611918210983276, "learning_rate": 6.213764826375795e-05, "loss": 1.5314, "step": 624 }, { "epoch": 0.041120450022205046, "grad_norm": 0.6270449757575989, "learning_rate": 6.203374792183599e-05, "loss": 1.5965, "step": 625 }, { "epoch": 0.04118624274224057, "grad_norm": 0.7064681053161621, "learning_rate": 6.192979239247243e-05, "loss": 1.8145, "step": 626 }, { "epoch": 0.0412520354622761, "grad_norm": 0.6724807024002075, "learning_rate": 6.182578215241311e-05, "loss": 1.6588, "step": 627 }, { "epoch": 0.041317828182311625, "grad_norm": 0.6827772259712219, "learning_rate": 6.172171767865483e-05, "loss": 1.5998, "step": 628 }, { "epoch": 0.04138362090234716, "grad_norm": 0.6520280838012695, "learning_rate": 6.161759944844308e-05, "loss": 1.5482, "step": 629 }, { "epoch": 0.041449413622382685, "grad_norm": 0.6610122323036194, "learning_rate": 6.15134279392699e-05, "loss": 1.5476, "step": 630 }, { "epoch": 0.04151520634241821, "grad_norm": 0.6820929646492004, "learning_rate": 6.140920362887167e-05, "loss": 1.5707, "step": 631 }, { "epoch": 0.04158099906245374, "grad_norm": 0.7021801471710205, "learning_rate": 6.13049269952269e-05, "loss": 1.711, "step": 632 }, { "epoch": 0.04164679178248927, "grad_norm": 0.6803610920906067, "learning_rate": 6.120059851655408e-05, "loss": 1.6353, "step": 633 }, { "epoch": 0.0417125845025248, "grad_norm": 0.6335561275482178, "learning_rate": 6.109621867130944e-05, "loss": 1.4506, "step": 634 }, { "epoch": 0.04177837722256032, "grad_norm": 0.679242730140686, "learning_rate": 6.0991787938184784e-05, "loss": 1.5531, "step": 635 }, { "epoch": 0.04184416994259585, "grad_norm": 0.7108798027038574, "learning_rate": 6.088730679610535e-05, "loss": 1.5429, "step": 636 }, { "epoch": 0.04190996266263138, "grad_norm": 0.6999435424804688, "learning_rate": 6.0782775724227436e-05, "loss": 1.762, "step": 637 }, { "epoch": 0.04197575538266691, "grad_norm": 0.7404870390892029, "learning_rate": 6.067819520193645e-05, "loss": 1.5536, "step": 638 }, { "epoch": 0.042041548102702435, "grad_norm": 0.7356504797935486, "learning_rate": 6.057356570884449e-05, "loss": 1.5618, "step": 639 }, { "epoch": 0.04210734082273796, "grad_norm": 0.7335320115089417, "learning_rate": 6.046888772478828e-05, "loss": 1.557, "step": 640 }, { "epoch": 0.042173133542773494, "grad_norm": 0.7280996441841125, "learning_rate": 6.0364161729826905e-05, "loss": 1.6124, "step": 641 }, { "epoch": 0.04223892626280902, "grad_norm": 0.8570873737335205, "learning_rate": 6.025938820423964e-05, "loss": 1.6069, "step": 642 }, { "epoch": 0.04230471898284455, "grad_norm": 0.7949631810188293, "learning_rate": 6.015456762852374e-05, "loss": 1.5835, "step": 643 }, { "epoch": 0.04237051170288007, "grad_norm": 0.8168470859527588, "learning_rate": 6.004970048339226e-05, "loss": 1.4906, "step": 644 }, { "epoch": 0.042436304422915606, "grad_norm": 0.7664223909378052, "learning_rate": 5.994478724977175e-05, "loss": 1.3437, "step": 645 }, { "epoch": 0.04250209714295113, "grad_norm": 0.7634305357933044, "learning_rate": 5.9839828408800204e-05, "loss": 1.4158, "step": 646 }, { "epoch": 0.04256788986298666, "grad_norm": 0.870861291885376, "learning_rate": 5.973482444182475e-05, "loss": 1.5231, "step": 647 }, { "epoch": 0.042633682583022185, "grad_norm": 0.8543813228607178, "learning_rate": 5.9629775830399424e-05, "loss": 1.3723, "step": 648 }, { "epoch": 0.04269947530305772, "grad_norm": 0.9646958708763123, "learning_rate": 5.952468305628307e-05, "loss": 1.457, "step": 649 }, { "epoch": 0.042765268023093245, "grad_norm": 1.210070252418518, "learning_rate": 5.941954660143703e-05, "loss": 1.4912, "step": 650 }, { "epoch": 0.04283106074312877, "grad_norm": 0.5520440936088562, "learning_rate": 5.931436694802295e-05, "loss": 1.8551, "step": 651 }, { "epoch": 0.042896853463164304, "grad_norm": 0.5633511543273926, "learning_rate": 5.920914457840063e-05, "loss": 1.696, "step": 652 }, { "epoch": 0.04296264618319983, "grad_norm": 0.5891466736793518, "learning_rate": 5.910387997512573e-05, "loss": 1.6773, "step": 653 }, { "epoch": 0.04302843890323536, "grad_norm": 0.5962758660316467, "learning_rate": 5.899857362094763e-05, "loss": 1.6356, "step": 654 }, { "epoch": 0.04309423162327088, "grad_norm": 0.6261197328567505, "learning_rate": 5.8893225998807166e-05, "loss": 1.6317, "step": 655 }, { "epoch": 0.043160024343306416, "grad_norm": 0.605209469795227, "learning_rate": 5.878783759183442e-05, "loss": 1.6793, "step": 656 }, { "epoch": 0.04322581706334194, "grad_norm": 0.673530638217926, "learning_rate": 5.868240888334653e-05, "loss": 1.8195, "step": 657 }, { "epoch": 0.04329160978337747, "grad_norm": 0.595020055770874, "learning_rate": 5.857694035684545e-05, "loss": 1.6684, "step": 658 }, { "epoch": 0.043357402503412995, "grad_norm": 0.6050863265991211, "learning_rate": 5.847143249601574e-05, "loss": 1.6713, "step": 659 }, { "epoch": 0.04342319522344853, "grad_norm": 0.6145523190498352, "learning_rate": 5.8365885784722394e-05, "loss": 1.7704, "step": 660 }, { "epoch": 0.043488987943484055, "grad_norm": 0.5903642177581787, "learning_rate": 5.826030070700849e-05, "loss": 1.8083, "step": 661 }, { "epoch": 0.04355478066351958, "grad_norm": 0.6061384081840515, "learning_rate": 5.8154677747093134e-05, "loss": 1.713, "step": 662 }, { "epoch": 0.04362057338355511, "grad_norm": 0.6104039549827576, "learning_rate": 5.804901738936914e-05, "loss": 1.6264, "step": 663 }, { "epoch": 0.04368636610359064, "grad_norm": 0.6567908525466919, "learning_rate": 5.794332011840079e-05, "loss": 1.6822, "step": 664 }, { "epoch": 0.04375215882362617, "grad_norm": 0.6191847324371338, "learning_rate": 5.783758641892172e-05, "loss": 1.6488, "step": 665 }, { "epoch": 0.04381795154366169, "grad_norm": 0.648104727268219, "learning_rate": 5.773181677583257e-05, "loss": 1.6951, "step": 666 }, { "epoch": 0.04388374426369722, "grad_norm": 0.610373854637146, "learning_rate": 5.762601167419885e-05, "loss": 1.6071, "step": 667 }, { "epoch": 0.04394953698373275, "grad_norm": 0.6029400825500488, "learning_rate": 5.7520171599248704e-05, "loss": 1.5817, "step": 668 }, { "epoch": 0.04401532970376828, "grad_norm": 0.5969151854515076, "learning_rate": 5.74142970363706e-05, "loss": 1.478, "step": 669 }, { "epoch": 0.044081122423803805, "grad_norm": 0.6027969717979431, "learning_rate": 5.730838847111123e-05, "loss": 1.5602, "step": 670 }, { "epoch": 0.04414691514383933, "grad_norm": 0.600036084651947, "learning_rate": 5.7202446389173223e-05, "loss": 1.6526, "step": 671 }, { "epoch": 0.044212707863874864, "grad_norm": 0.648833692073822, "learning_rate": 5.709647127641286e-05, "loss": 1.6957, "step": 672 }, { "epoch": 0.04427850058391039, "grad_norm": 0.5996218323707581, "learning_rate": 5.6990463618837977e-05, "loss": 1.513, "step": 673 }, { "epoch": 0.04434429330394592, "grad_norm": 0.6365662217140198, "learning_rate": 5.688442390260559e-05, "loss": 1.5507, "step": 674 }, { "epoch": 0.04441008602398144, "grad_norm": 0.6217913627624512, "learning_rate": 5.6778352614019795e-05, "loss": 1.6172, "step": 675 }, { "epoch": 0.044475878744016976, "grad_norm": 0.6233986020088196, "learning_rate": 5.6672250239529465e-05, "loss": 1.3737, "step": 676 }, { "epoch": 0.0445416714640525, "grad_norm": 0.6476162075996399, "learning_rate": 5.6566117265726006e-05, "loss": 1.5756, "step": 677 }, { "epoch": 0.04460746418408803, "grad_norm": 0.6581618189811707, "learning_rate": 5.645995417934119e-05, "loss": 1.5962, "step": 678 }, { "epoch": 0.044673256904123555, "grad_norm": 0.6630986928939819, "learning_rate": 5.635376146724489e-05, "loss": 1.6214, "step": 679 }, { "epoch": 0.04473904962415909, "grad_norm": 0.7406059503555298, "learning_rate": 5.624753961644281e-05, "loss": 1.6885, "step": 680 }, { "epoch": 0.044804842344194615, "grad_norm": 0.6429552435874939, "learning_rate": 5.614128911407431e-05, "loss": 1.4918, "step": 681 }, { "epoch": 0.04487063506423014, "grad_norm": 0.7244037389755249, "learning_rate": 5.603501044741013e-05, "loss": 1.5453, "step": 682 }, { "epoch": 0.044936427784265674, "grad_norm": 0.7735380530357361, "learning_rate": 5.5928704103850206e-05, "loss": 1.5844, "step": 683 }, { "epoch": 0.0450022205043012, "grad_norm": 0.701229989528656, "learning_rate": 5.582237057092137e-05, "loss": 1.551, "step": 684 }, { "epoch": 0.04506801322433673, "grad_norm": 0.7064613103866577, "learning_rate": 5.571601033627514e-05, "loss": 1.6503, "step": 685 }, { "epoch": 0.04513380594437225, "grad_norm": 0.7237836718559265, "learning_rate": 5.5609623887685535e-05, "loss": 1.668, "step": 686 }, { "epoch": 0.045199598664407786, "grad_norm": 0.7475508451461792, "learning_rate": 5.550321171304675e-05, "loss": 1.6663, "step": 687 }, { "epoch": 0.04526539138444331, "grad_norm": 0.7455587983131409, "learning_rate": 5.539677430037098e-05, "loss": 1.6195, "step": 688 }, { "epoch": 0.04533118410447884, "grad_norm": 0.6819461584091187, "learning_rate": 5.5290312137786146e-05, "loss": 1.4005, "step": 689 }, { "epoch": 0.045396976824514365, "grad_norm": 0.7876448631286621, "learning_rate": 5.518382571353369e-05, "loss": 1.6189, "step": 690 }, { "epoch": 0.0454627695445499, "grad_norm": 0.7936854958534241, "learning_rate": 5.507731551596632e-05, "loss": 1.7235, "step": 691 }, { "epoch": 0.045528562264585425, "grad_norm": 0.7383480668067932, "learning_rate": 5.4970782033545774e-05, "loss": 1.5017, "step": 692 }, { "epoch": 0.04559435498462095, "grad_norm": 0.7927383184432983, "learning_rate": 5.486422575484054e-05, "loss": 1.5818, "step": 693 }, { "epoch": 0.04566014770465648, "grad_norm": 0.8128315806388855, "learning_rate": 5.47576471685237e-05, "loss": 1.4506, "step": 694 }, { "epoch": 0.04572594042469201, "grad_norm": 0.7387059330940247, "learning_rate": 5.465104676337062e-05, "loss": 1.3782, "step": 695 }, { "epoch": 0.04579173314472754, "grad_norm": 0.974277913570404, "learning_rate": 5.4544425028256695e-05, "loss": 1.681, "step": 696 }, { "epoch": 0.04585752586476306, "grad_norm": 0.9682861566543579, "learning_rate": 5.443778245215519e-05, "loss": 1.6909, "step": 697 }, { "epoch": 0.04592331858479859, "grad_norm": 0.8716404438018799, "learning_rate": 5.433111952413495e-05, "loss": 1.4352, "step": 698 }, { "epoch": 0.04598911130483412, "grad_norm": 0.9771693348884583, "learning_rate": 5.42244367333581e-05, "loss": 1.4673, "step": 699 }, { "epoch": 0.04605490402486965, "grad_norm": 1.2287424802780151, "learning_rate": 5.411773456907792e-05, "loss": 1.2711, "step": 700 }, { "epoch": 0.046120696744905175, "grad_norm": 0.5442025065422058, "learning_rate": 5.401101352063647e-05, "loss": 1.7773, "step": 701 }, { "epoch": 0.0461864894649407, "grad_norm": 0.577033519744873, "learning_rate": 5.390427407746248e-05, "loss": 1.6329, "step": 702 }, { "epoch": 0.046252282184976234, "grad_norm": 0.5880780816078186, "learning_rate": 5.379751672906902e-05, "loss": 1.6615, "step": 703 }, { "epoch": 0.04631807490501176, "grad_norm": 0.5862165689468384, "learning_rate": 5.369074196505125e-05, "loss": 1.7882, "step": 704 }, { "epoch": 0.04638386762504729, "grad_norm": 0.5397851467132568, "learning_rate": 5.3583950275084206e-05, "loss": 1.5977, "step": 705 }, { "epoch": 0.04644966034508281, "grad_norm": 0.581055760383606, "learning_rate": 5.347714214892058e-05, "loss": 1.5843, "step": 706 }, { "epoch": 0.046515453065118346, "grad_norm": 0.6341369152069092, "learning_rate": 5.33703180763884e-05, "loss": 1.698, "step": 707 }, { "epoch": 0.04658124578515387, "grad_norm": 0.562896192073822, "learning_rate": 5.3263478547388865e-05, "loss": 1.6773, "step": 708 }, { "epoch": 0.0466470385051894, "grad_norm": 0.5934199094772339, "learning_rate": 5.3156624051894e-05, "loss": 1.7178, "step": 709 }, { "epoch": 0.04671283122522493, "grad_norm": 0.5896904468536377, "learning_rate": 5.3049755079944527e-05, "loss": 1.5911, "step": 710 }, { "epoch": 0.04677862394526046, "grad_norm": 0.6300824284553528, "learning_rate": 5.2942872121647546e-05, "loss": 1.6214, "step": 711 }, { "epoch": 0.046844416665295985, "grad_norm": 0.6136653423309326, "learning_rate": 5.2835975667174254e-05, "loss": 1.7043, "step": 712 }, { "epoch": 0.04691020938533151, "grad_norm": 0.5764832496643066, "learning_rate": 5.272906620675779e-05, "loss": 1.5961, "step": 713 }, { "epoch": 0.046976002105367044, "grad_norm": 0.6364599466323853, "learning_rate": 5.2622144230690954e-05, "loss": 1.8236, "step": 714 }, { "epoch": 0.04704179482540257, "grad_norm": 0.6356618404388428, "learning_rate": 5.2515210229323866e-05, "loss": 1.6795, "step": 715 }, { "epoch": 0.0471075875454381, "grad_norm": 0.6253381371498108, "learning_rate": 5.240826469306187e-05, "loss": 1.5472, "step": 716 }, { "epoch": 0.04717338026547362, "grad_norm": 0.5967807173728943, "learning_rate": 5.230130811236316e-05, "loss": 1.7268, "step": 717 }, { "epoch": 0.047239172985509156, "grad_norm": 0.5806270241737366, "learning_rate": 5.2194340977736635e-05, "loss": 1.571, "step": 718 }, { "epoch": 0.04730496570554468, "grad_norm": 0.6197123527526855, "learning_rate": 5.208736377973954e-05, "loss": 1.6507, "step": 719 }, { "epoch": 0.04737075842558021, "grad_norm": 0.6148327589035034, "learning_rate": 5.19803770089753e-05, "loss": 1.8198, "step": 720 }, { "epoch": 0.047436551145615735, "grad_norm": 0.6454043984413147, "learning_rate": 5.187338115609123e-05, "loss": 1.4741, "step": 721 }, { "epoch": 0.04750234386565127, "grad_norm": 0.6259646415710449, "learning_rate": 5.176637671177631e-05, "loss": 1.6452, "step": 722 }, { "epoch": 0.047568136585686795, "grad_norm": 0.6418277025222778, "learning_rate": 5.1659364166758904e-05, "loss": 1.4635, "step": 723 }, { "epoch": 0.04763392930572232, "grad_norm": 0.6576802134513855, "learning_rate": 5.1552344011804554e-05, "loss": 1.5849, "step": 724 }, { "epoch": 0.04769972202575785, "grad_norm": 0.679136335849762, "learning_rate": 5.144531673771363e-05, "loss": 1.7297, "step": 725 }, { "epoch": 0.04776551474579338, "grad_norm": 0.6577818393707275, "learning_rate": 5.133828283531926e-05, "loss": 1.7771, "step": 726 }, { "epoch": 0.04783130746582891, "grad_norm": 0.6642506718635559, "learning_rate": 5.12312427954849e-05, "loss": 1.6831, "step": 727 }, { "epoch": 0.04789710018586443, "grad_norm": 0.6602113842964172, "learning_rate": 5.112419710910213e-05, "loss": 1.6407, "step": 728 }, { "epoch": 0.04796289290589996, "grad_norm": 0.6676844954490662, "learning_rate": 5.101714626708849e-05, "loss": 1.5256, "step": 729 }, { "epoch": 0.04802868562593549, "grad_norm": 0.6657158136367798, "learning_rate": 5.091009076038514e-05, "loss": 1.6361, "step": 730 }, { "epoch": 0.04809447834597102, "grad_norm": 0.6682079434394836, "learning_rate": 5.080303107995461e-05, "loss": 1.4751, "step": 731 }, { "epoch": 0.048160271066006545, "grad_norm": 0.68694007396698, "learning_rate": 5.0695967716778606e-05, "loss": 1.6417, "step": 732 }, { "epoch": 0.04822606378604207, "grad_norm": 0.726314127445221, "learning_rate": 5.058890116185566e-05, "loss": 1.5361, "step": 733 }, { "epoch": 0.048291856506077604, "grad_norm": 0.6680204272270203, "learning_rate": 5.048183190619904e-05, "loss": 1.4233, "step": 734 }, { "epoch": 0.04835764922611313, "grad_norm": 0.686355710029602, "learning_rate": 5.0374760440834335e-05, "loss": 1.4506, "step": 735 }, { "epoch": 0.04842344194614866, "grad_norm": 0.7238395810127258, "learning_rate": 5.026768725679726e-05, "loss": 1.5804, "step": 736 }, { "epoch": 0.04848923466618418, "grad_norm": 0.7959031462669373, "learning_rate": 5.0160612845131414e-05, "loss": 1.761, "step": 737 }, { "epoch": 0.04855502738621972, "grad_norm": 0.7459606528282166, "learning_rate": 5.005353769688611e-05, "loss": 1.4302, "step": 738 }, { "epoch": 0.04862082010625524, "grad_norm": 0.7083699703216553, "learning_rate": 4.994646230311391e-05, "loss": 1.5262, "step": 739 }, { "epoch": 0.04868661282629077, "grad_norm": 0.7352794408798218, "learning_rate": 4.9839387154868584e-05, "loss": 1.5299, "step": 740 }, { "epoch": 0.0487524055463263, "grad_norm": 0.7063877582550049, "learning_rate": 4.973231274320276e-05, "loss": 1.6491, "step": 741 }, { "epoch": 0.04881819826636183, "grad_norm": 0.7635167837142944, "learning_rate": 4.962523955916569e-05, "loss": 1.677, "step": 742 }, { "epoch": 0.048883990986397355, "grad_norm": 0.756692111492157, "learning_rate": 4.951816809380097e-05, "loss": 1.4049, "step": 743 }, { "epoch": 0.04894978370643288, "grad_norm": 0.7604605555534363, "learning_rate": 4.9411098838144346e-05, "loss": 1.6448, "step": 744 }, { "epoch": 0.049015576426468414, "grad_norm": 0.8182808756828308, "learning_rate": 4.9304032283221405e-05, "loss": 1.5077, "step": 745 }, { "epoch": 0.04908136914650394, "grad_norm": 0.7334157824516296, "learning_rate": 4.919696892004539e-05, "loss": 1.4763, "step": 746 }, { "epoch": 0.04914716186653947, "grad_norm": 0.8388804197311401, "learning_rate": 4.908990923961488e-05, "loss": 1.4618, "step": 747 }, { "epoch": 0.04921295458657499, "grad_norm": 0.8400937914848328, "learning_rate": 4.898285373291152e-05, "loss": 1.3205, "step": 748 }, { "epoch": 0.049278747306610526, "grad_norm": 0.9195184111595154, "learning_rate": 4.887580289089787e-05, "loss": 1.3075, "step": 749 }, { "epoch": 0.04934454002664605, "grad_norm": 1.210403561592102, "learning_rate": 4.876875720451511e-05, "loss": 1.3114, "step": 750 }, { "epoch": 0.04941033274668158, "grad_norm": 0.5409945845603943, "learning_rate": 4.866171716468074e-05, "loss": 1.5787, "step": 751 }, { "epoch": 0.049476125466717105, "grad_norm": 0.5661841034889221, "learning_rate": 4.855468326228638e-05, "loss": 1.5597, "step": 752 }, { "epoch": 0.04954191818675264, "grad_norm": 0.5858348608016968, "learning_rate": 4.8447655988195464e-05, "loss": 1.6057, "step": 753 }, { "epoch": 0.049607710906788165, "grad_norm": 0.5190932154655457, "learning_rate": 4.834063583324111e-05, "loss": 1.5158, "step": 754 }, { "epoch": 0.04967350362682369, "grad_norm": 0.6089287996292114, "learning_rate": 4.8233623288223704e-05, "loss": 1.7857, "step": 755 }, { "epoch": 0.04973929634685922, "grad_norm": 0.6427363753318787, "learning_rate": 4.8126618843908775e-05, "loss": 1.8208, "step": 756 }, { "epoch": 0.04980508906689475, "grad_norm": 0.5550252795219421, "learning_rate": 4.801962299102471e-05, "loss": 1.6078, "step": 757 }, { "epoch": 0.04987088178693028, "grad_norm": 0.6121551990509033, "learning_rate": 4.7912636220260473e-05, "loss": 1.725, "step": 758 }, { "epoch": 0.0499366745069658, "grad_norm": 0.6313748955726624, "learning_rate": 4.780565902226338e-05, "loss": 1.8692, "step": 759 }, { "epoch": 0.05000246722700133, "grad_norm": 0.5803863406181335, "learning_rate": 4.7698691887636854e-05, "loss": 1.5714, "step": 760 }, { "epoch": 0.05006825994703686, "grad_norm": 0.5760961174964905, "learning_rate": 4.759173530693814e-05, "loss": 1.6356, "step": 761 }, { "epoch": 0.05013405266707239, "grad_norm": 0.5896910429000854, "learning_rate": 4.748478977067614e-05, "loss": 1.5632, "step": 762 }, { "epoch": 0.050199845387107915, "grad_norm": 0.5783917903900146, "learning_rate": 4.737785576930908e-05, "loss": 1.531, "step": 763 }, { "epoch": 0.05026563810714344, "grad_norm": 0.6067524552345276, "learning_rate": 4.727093379324222e-05, "loss": 1.6955, "step": 764 }, { "epoch": 0.050331430827178975, "grad_norm": 0.5850797295570374, "learning_rate": 4.716402433282575e-05, "loss": 1.6401, "step": 765 }, { "epoch": 0.0503972235472145, "grad_norm": 0.6247797012329102, "learning_rate": 4.705712787835247e-05, "loss": 1.7098, "step": 766 }, { "epoch": 0.05046301626725003, "grad_norm": 0.5667417049407959, "learning_rate": 4.695024492005548e-05, "loss": 1.5379, "step": 767 }, { "epoch": 0.05052880898728556, "grad_norm": 0.5960890650749207, "learning_rate": 4.684337594810602e-05, "loss": 1.7614, "step": 768 }, { "epoch": 0.05059460170732109, "grad_norm": 0.6005357503890991, "learning_rate": 4.673652145261116e-05, "loss": 1.5532, "step": 769 }, { "epoch": 0.05066039442735661, "grad_norm": 0.6175205111503601, "learning_rate": 4.6629681923611603e-05, "loss": 1.6746, "step": 770 }, { "epoch": 0.05072618714739214, "grad_norm": 0.6387405395507812, "learning_rate": 4.652285785107943e-05, "loss": 1.6399, "step": 771 }, { "epoch": 0.05079197986742767, "grad_norm": 0.5912647247314453, "learning_rate": 4.64160497249158e-05, "loss": 1.7165, "step": 772 }, { "epoch": 0.0508577725874632, "grad_norm": 0.6159881353378296, "learning_rate": 4.630925803494877e-05, "loss": 1.6739, "step": 773 }, { "epoch": 0.050923565307498725, "grad_norm": 0.6138644218444824, "learning_rate": 4.6202483270931e-05, "loss": 1.535, "step": 774 }, { "epoch": 0.05098935802753425, "grad_norm": 0.6410723924636841, "learning_rate": 4.6095725922537533e-05, "loss": 1.6135, "step": 775 }, { "epoch": 0.051055150747569784, "grad_norm": 0.6804289817810059, "learning_rate": 4.598898647936354e-05, "loss": 1.639, "step": 776 }, { "epoch": 0.05112094346760531, "grad_norm": 0.6560742259025574, "learning_rate": 4.588226543092209e-05, "loss": 1.6589, "step": 777 }, { "epoch": 0.05118673618764084, "grad_norm": 0.6562034487724304, "learning_rate": 4.5775563266641894e-05, "loss": 1.5987, "step": 778 }, { "epoch": 0.05125252890767636, "grad_norm": 0.7900398969650269, "learning_rate": 4.566888047586507e-05, "loss": 1.8027, "step": 779 }, { "epoch": 0.051318321627711896, "grad_norm": 0.6825142502784729, "learning_rate": 4.556221754784482e-05, "loss": 1.5588, "step": 780 }, { "epoch": 0.05138411434774742, "grad_norm": 0.6759592294692993, "learning_rate": 4.545557497174331e-05, "loss": 1.535, "step": 781 }, { "epoch": 0.05144990706778295, "grad_norm": 0.7403655052185059, "learning_rate": 4.5348953236629395e-05, "loss": 1.6259, "step": 782 }, { "epoch": 0.051515699787818475, "grad_norm": 0.711732804775238, "learning_rate": 4.52423528314763e-05, "loss": 1.5692, "step": 783 }, { "epoch": 0.05158149250785401, "grad_norm": 0.6846985220909119, "learning_rate": 4.5135774245159454e-05, "loss": 1.5152, "step": 784 }, { "epoch": 0.051647285227889535, "grad_norm": 0.6845923066139221, "learning_rate": 4.502921796645424e-05, "loss": 1.5296, "step": 785 }, { "epoch": 0.05171307794792506, "grad_norm": 0.7074812054634094, "learning_rate": 4.492268448403369e-05, "loss": 1.4728, "step": 786 }, { "epoch": 0.05177887066796059, "grad_norm": 1.4424465894699097, "learning_rate": 4.4816174286466314e-05, "loss": 1.4922, "step": 787 }, { "epoch": 0.05184466338799612, "grad_norm": 0.7188639640808105, "learning_rate": 4.4709687862213866e-05, "loss": 1.6399, "step": 788 }, { "epoch": 0.05191045610803165, "grad_norm": 0.7116633057594299, "learning_rate": 4.4603225699629037e-05, "loss": 1.586, "step": 789 }, { "epoch": 0.05197624882806717, "grad_norm": 0.728759229183197, "learning_rate": 4.4496788286953266e-05, "loss": 1.4238, "step": 790 }, { "epoch": 0.0520420415481027, "grad_norm": 0.7237952947616577, "learning_rate": 4.439037611231448e-05, "loss": 1.514, "step": 791 }, { "epoch": 0.05210783426813823, "grad_norm": 0.7653865218162537, "learning_rate": 4.4283989663724875e-05, "loss": 1.6958, "step": 792 }, { "epoch": 0.05217362698817376, "grad_norm": 0.7304019331932068, "learning_rate": 4.4177629429078635e-05, "loss": 1.5366, "step": 793 }, { "epoch": 0.052239419708209285, "grad_norm": 0.7114179730415344, "learning_rate": 4.407129589614979e-05, "loss": 1.4206, "step": 794 }, { "epoch": 0.05230521242824481, "grad_norm": 0.7419894337654114, "learning_rate": 4.396498955258989e-05, "loss": 1.4686, "step": 795 }, { "epoch": 0.052371005148280345, "grad_norm": 0.8355379700660706, "learning_rate": 4.385871088592571e-05, "loss": 1.63, "step": 796 }, { "epoch": 0.05243679786831587, "grad_norm": 0.8412326574325562, "learning_rate": 4.3752460383557195e-05, "loss": 1.6383, "step": 797 }, { "epoch": 0.0525025905883514, "grad_norm": 0.8090490102767944, "learning_rate": 4.3646238532755114e-05, "loss": 1.407, "step": 798 }, { "epoch": 0.05256838330838693, "grad_norm": 0.8998046517372131, "learning_rate": 4.3540045820658804e-05, "loss": 1.625, "step": 799 }, { "epoch": 0.05263417602842246, "grad_norm": 1.3124433755874634, "learning_rate": 4.3433882734274e-05, "loss": 1.5433, "step": 800 }, { "epoch": 0.05269996874845798, "grad_norm": 0.5437705516815186, "learning_rate": 4.332774976047055e-05, "loss": 1.7119, "step": 801 }, { "epoch": 0.05276576146849351, "grad_norm": 0.5468183159828186, "learning_rate": 4.322164738598022e-05, "loss": 1.4418, "step": 802 }, { "epoch": 0.05283155418852904, "grad_norm": 0.5732606053352356, "learning_rate": 4.311557609739442e-05, "loss": 1.7286, "step": 803 }, { "epoch": 0.05289734690856457, "grad_norm": 0.5762453675270081, "learning_rate": 4.300953638116204e-05, "loss": 1.7308, "step": 804 }, { "epoch": 0.052963139628600095, "grad_norm": 0.5938736796379089, "learning_rate": 4.290352872358714e-05, "loss": 1.7055, "step": 805 }, { "epoch": 0.05302893234863562, "grad_norm": 0.5911692976951599, "learning_rate": 4.27975536108268e-05, "loss": 1.631, "step": 806 }, { "epoch": 0.053094725068671154, "grad_norm": 0.5975512266159058, "learning_rate": 4.2691611528888775e-05, "loss": 1.6773, "step": 807 }, { "epoch": 0.05316051778870668, "grad_norm": 0.5482115745544434, "learning_rate": 4.258570296362942e-05, "loss": 1.5443, "step": 808 }, { "epoch": 0.05322631050874221, "grad_norm": 0.6074912548065186, "learning_rate": 4.24798284007513e-05, "loss": 1.6919, "step": 809 }, { "epoch": 0.05329210322877773, "grad_norm": 0.5921036601066589, "learning_rate": 4.2373988325801145e-05, "loss": 1.6611, "step": 810 }, { "epoch": 0.053357895948813266, "grad_norm": 0.641273558139801, "learning_rate": 4.2268183224167456e-05, "loss": 1.8859, "step": 811 }, { "epoch": 0.05342368866884879, "grad_norm": 0.610242486000061, "learning_rate": 4.216241358107831e-05, "loss": 1.5211, "step": 812 }, { "epoch": 0.05348948138888432, "grad_norm": 0.6088224053382874, "learning_rate": 4.205667988159921e-05, "loss": 1.6132, "step": 813 }, { "epoch": 0.053555274108919845, "grad_norm": 0.5857619643211365, "learning_rate": 4.195098261063087e-05, "loss": 1.5505, "step": 814 }, { "epoch": 0.05362106682895538, "grad_norm": 0.6356807351112366, "learning_rate": 4.1845322252906864e-05, "loss": 1.6472, "step": 815 }, { "epoch": 0.053686859548990905, "grad_norm": 0.5856146216392517, "learning_rate": 4.173969929299151e-05, "loss": 1.7503, "step": 816 }, { "epoch": 0.05375265226902643, "grad_norm": 0.60245680809021, "learning_rate": 4.1634114215277625e-05, "loss": 1.5542, "step": 817 }, { "epoch": 0.05381844498906196, "grad_norm": 0.6490529179573059, "learning_rate": 4.152856750398426e-05, "loss": 1.5974, "step": 818 }, { "epoch": 0.05388423770909749, "grad_norm": 0.6497132778167725, "learning_rate": 4.1423059643154564e-05, "loss": 1.603, "step": 819 }, { "epoch": 0.05395003042913302, "grad_norm": 0.5978454351425171, "learning_rate": 4.131759111665349e-05, "loss": 1.4747, "step": 820 }, { "epoch": 0.05401582314916854, "grad_norm": 0.6695238947868347, "learning_rate": 4.1212162408165595e-05, "loss": 1.6682, "step": 821 }, { "epoch": 0.05408161586920407, "grad_norm": 0.6585442423820496, "learning_rate": 4.110677400119285e-05, "loss": 1.72, "step": 822 }, { "epoch": 0.0541474085892396, "grad_norm": 0.6088001132011414, "learning_rate": 4.100142637905238e-05, "loss": 1.474, "step": 823 }, { "epoch": 0.05421320130927513, "grad_norm": 0.6855148673057556, "learning_rate": 4.0896120024874286e-05, "loss": 1.6749, "step": 824 }, { "epoch": 0.054278994029310655, "grad_norm": 0.6727811694145203, "learning_rate": 4.079085542159938e-05, "loss": 1.7432, "step": 825 }, { "epoch": 0.05434478674934618, "grad_norm": 0.6346832513809204, "learning_rate": 4.068563305197706e-05, "loss": 1.5826, "step": 826 }, { "epoch": 0.054410579469381715, "grad_norm": 0.6359438300132751, "learning_rate": 4.0580453398563e-05, "loss": 1.5543, "step": 827 }, { "epoch": 0.05447637218941724, "grad_norm": 0.6643136739730835, "learning_rate": 4.047531694371695e-05, "loss": 1.5587, "step": 828 }, { "epoch": 0.05454216490945277, "grad_norm": 0.6467865109443665, "learning_rate": 4.037022416960058e-05, "loss": 1.7538, "step": 829 }, { "epoch": 0.0546079576294883, "grad_norm": 0.6873615980148315, "learning_rate": 4.0265175558175265e-05, "loss": 1.5784, "step": 830 }, { "epoch": 0.05467375034952383, "grad_norm": 0.6892521977424622, "learning_rate": 4.01601715911998e-05, "loss": 1.5759, "step": 831 }, { "epoch": 0.05473954306955935, "grad_norm": 0.6923861503601074, "learning_rate": 4.005521275022826e-05, "loss": 1.6928, "step": 832 }, { "epoch": 0.05480533578959488, "grad_norm": 0.9242671132087708, "learning_rate": 3.9950299516607766e-05, "loss": 1.7272, "step": 833 }, { "epoch": 0.05487112850963041, "grad_norm": 0.7108779549598694, "learning_rate": 3.9845432371476264e-05, "loss": 1.632, "step": 834 }, { "epoch": 0.05493692122966594, "grad_norm": 0.7247679233551025, "learning_rate": 3.9740611795760376e-05, "loss": 1.5377, "step": 835 }, { "epoch": 0.055002713949701465, "grad_norm": 0.7193149328231812, "learning_rate": 3.9635838270173107e-05, "loss": 1.431, "step": 836 }, { "epoch": 0.05506850666973699, "grad_norm": 0.7430593967437744, "learning_rate": 3.9531112275211736e-05, "loss": 1.5027, "step": 837 }, { "epoch": 0.055134299389772524, "grad_norm": 0.7426053285598755, "learning_rate": 3.9426434291155526e-05, "loss": 1.6552, "step": 838 }, { "epoch": 0.05520009210980805, "grad_norm": 0.7447395324707031, "learning_rate": 3.9321804798063565e-05, "loss": 1.5632, "step": 839 }, { "epoch": 0.05526588482984358, "grad_norm": 0.7917671203613281, "learning_rate": 3.921722427577257e-05, "loss": 1.5141, "step": 840 }, { "epoch": 0.0553316775498791, "grad_norm": 0.815054178237915, "learning_rate": 3.9112693203894664e-05, "loss": 1.5968, "step": 841 }, { "epoch": 0.055397470269914637, "grad_norm": 0.8161969184875488, "learning_rate": 3.900821206181521e-05, "loss": 1.487, "step": 842 }, { "epoch": 0.05546326298995016, "grad_norm": 0.7944288849830627, "learning_rate": 3.890378132869059e-05, "loss": 1.6617, "step": 843 }, { "epoch": 0.05552905570998569, "grad_norm": 0.801561176776886, "learning_rate": 3.879940148344595e-05, "loss": 1.4835, "step": 844 }, { "epoch": 0.055594848430021215, "grad_norm": 0.8297501802444458, "learning_rate": 3.8695073004773106e-05, "loss": 1.3569, "step": 845 }, { "epoch": 0.05566064115005675, "grad_norm": 0.8283421993255615, "learning_rate": 3.859079637112833e-05, "loss": 1.4448, "step": 846 }, { "epoch": 0.055726433870092275, "grad_norm": 0.862443745136261, "learning_rate": 3.84865720607301e-05, "loss": 1.558, "step": 847 }, { "epoch": 0.0557922265901278, "grad_norm": 0.8980772495269775, "learning_rate": 3.838240055155692e-05, "loss": 1.4493, "step": 848 }, { "epoch": 0.05585801931016333, "grad_norm": 0.9152320623397827, "learning_rate": 3.827828232134519e-05, "loss": 1.2151, "step": 849 }, { "epoch": 0.05592381203019886, "grad_norm": 1.169055700302124, "learning_rate": 3.8174217847586904e-05, "loss": 1.2846, "step": 850 }, { "epoch": 0.05598960475023439, "grad_norm": 0.5100705623626709, "learning_rate": 3.8070207607527584e-05, "loss": 1.5764, "step": 851 }, { "epoch": 0.05605539747026991, "grad_norm": 0.5505961179733276, "learning_rate": 3.796625207816401e-05, "loss": 1.7925, "step": 852 }, { "epoch": 0.05612119019030544, "grad_norm": 0.6031649708747864, "learning_rate": 3.7862351736242066e-05, "loss": 1.7848, "step": 853 }, { "epoch": 0.05618698291034097, "grad_norm": 0.6231903433799744, "learning_rate": 3.775850705825454e-05, "loss": 1.7385, "step": 854 }, { "epoch": 0.0562527756303765, "grad_norm": 0.5796992778778076, "learning_rate": 3.765471852043892e-05, "loss": 1.6627, "step": 855 }, { "epoch": 0.056318568350412025, "grad_norm": 0.5962229371070862, "learning_rate": 3.755098659877527e-05, "loss": 1.7171, "step": 856 }, { "epoch": 0.05638436107044756, "grad_norm": 0.6059638857841492, "learning_rate": 3.7447311768983964e-05, "loss": 1.6715, "step": 857 }, { "epoch": 0.056450153790483085, "grad_norm": 0.6489062905311584, "learning_rate": 3.7343694506523605e-05, "loss": 1.7469, "step": 858 }, { "epoch": 0.05651594651051861, "grad_norm": 0.6001795530319214, "learning_rate": 3.7240135286588765e-05, "loss": 1.6136, "step": 859 }, { "epoch": 0.05658173923055414, "grad_norm": 0.6038196086883545, "learning_rate": 3.713663458410779e-05, "loss": 1.6115, "step": 860 }, { "epoch": 0.05664753195058967, "grad_norm": 0.584757924079895, "learning_rate": 3.703319287374069e-05, "loss": 1.5293, "step": 861 }, { "epoch": 0.0567133246706252, "grad_norm": 0.6052143573760986, "learning_rate": 3.692981062987698e-05, "loss": 1.8357, "step": 862 }, { "epoch": 0.05677911739066072, "grad_norm": 0.6260353326797485, "learning_rate": 3.682648832663339e-05, "loss": 1.6054, "step": 863 }, { "epoch": 0.05684491011069625, "grad_norm": 0.643218457698822, "learning_rate": 3.67232264378518e-05, "loss": 1.7711, "step": 864 }, { "epoch": 0.05691070283073178, "grad_norm": 0.577084481716156, "learning_rate": 3.662002543709702e-05, "loss": 1.582, "step": 865 }, { "epoch": 0.05697649555076731, "grad_norm": 0.7629356384277344, "learning_rate": 3.6516885797654594e-05, "loss": 1.6112, "step": 866 }, { "epoch": 0.057042288270802835, "grad_norm": 0.6926255226135254, "learning_rate": 3.641380799252869e-05, "loss": 1.537, "step": 867 }, { "epoch": 0.05710808099083836, "grad_norm": 0.6118739247322083, "learning_rate": 3.6310792494439894e-05, "loss": 1.6167, "step": 868 }, { "epoch": 0.057173873710873895, "grad_norm": 0.6357191205024719, "learning_rate": 3.620783977582305e-05, "loss": 1.4908, "step": 869 }, { "epoch": 0.05723966643090942, "grad_norm": 0.6403725743293762, "learning_rate": 3.61049503088251e-05, "loss": 1.469, "step": 870 }, { "epoch": 0.05730545915094495, "grad_norm": 0.6135770678520203, "learning_rate": 3.6002124565302875e-05, "loss": 1.4758, "step": 871 }, { "epoch": 0.05737125187098047, "grad_norm": 0.6306059956550598, "learning_rate": 3.5899363016821e-05, "loss": 1.5523, "step": 872 }, { "epoch": 0.05743704459101601, "grad_norm": 0.6534183025360107, "learning_rate": 3.579666613464968e-05, "loss": 1.5653, "step": 873 }, { "epoch": 0.05750283731105153, "grad_norm": 0.6130310893058777, "learning_rate": 3.56940343897626e-05, "loss": 1.547, "step": 874 }, { "epoch": 0.05756863003108706, "grad_norm": 0.6713092923164368, "learning_rate": 3.559146825283465e-05, "loss": 1.6401, "step": 875 }, { "epoch": 0.057634422751122585, "grad_norm": 0.6866581439971924, "learning_rate": 3.5488968194239926e-05, "loss": 1.6317, "step": 876 }, { "epoch": 0.05770021547115812, "grad_norm": 0.6544456481933594, "learning_rate": 3.538653468404936e-05, "loss": 1.5999, "step": 877 }, { "epoch": 0.057766008191193645, "grad_norm": 0.6867201328277588, "learning_rate": 3.528416819202881e-05, "loss": 1.6641, "step": 878 }, { "epoch": 0.05783180091122917, "grad_norm": 0.7083088159561157, "learning_rate": 3.5181869187636735e-05, "loss": 1.499, "step": 879 }, { "epoch": 0.0578975936312647, "grad_norm": 0.7546277046203613, "learning_rate": 3.5079638140022094e-05, "loss": 1.6342, "step": 880 }, { "epoch": 0.05796338635130023, "grad_norm": 0.7427401542663574, "learning_rate": 3.497747551802221e-05, "loss": 1.7578, "step": 881 }, { "epoch": 0.05802917907133576, "grad_norm": 0.7476483583450317, "learning_rate": 3.487538179016054e-05, "loss": 1.5637, "step": 882 }, { "epoch": 0.05809497179137128, "grad_norm": 0.7196775078773499, "learning_rate": 3.4773357424644685e-05, "loss": 1.5247, "step": 883 }, { "epoch": 0.05816076451140681, "grad_norm": 0.7455043792724609, "learning_rate": 3.467140288936407e-05, "loss": 1.5444, "step": 884 }, { "epoch": 0.05822655723144234, "grad_norm": 0.7095162868499756, "learning_rate": 3.456951865188791e-05, "loss": 1.6029, "step": 885 }, { "epoch": 0.05829234995147787, "grad_norm": 0.7810092568397522, "learning_rate": 3.446770517946303e-05, "loss": 1.5827, "step": 886 }, { "epoch": 0.058358142671513395, "grad_norm": 0.7805618643760681, "learning_rate": 3.43659629390117e-05, "loss": 1.562, "step": 887 }, { "epoch": 0.05842393539154893, "grad_norm": 0.7464081048965454, "learning_rate": 3.426429239712952e-05, "loss": 1.646, "step": 888 }, { "epoch": 0.058489728111584455, "grad_norm": 0.7130300998687744, "learning_rate": 3.4162694020083294e-05, "loss": 1.3213, "step": 889 }, { "epoch": 0.05855552083161998, "grad_norm": 0.7412570118904114, "learning_rate": 3.406116827380889e-05, "loss": 1.6022, "step": 890 }, { "epoch": 0.05862131355165551, "grad_norm": 0.7365239262580872, "learning_rate": 3.395971562390905e-05, "loss": 1.5959, "step": 891 }, { "epoch": 0.05868710627169104, "grad_norm": 0.8533955812454224, "learning_rate": 3.385833653565132e-05, "loss": 1.766, "step": 892 }, { "epoch": 0.05875289899172657, "grad_norm": 0.8184581995010376, "learning_rate": 3.375703147396583e-05, "loss": 1.5165, "step": 893 }, { "epoch": 0.05881869171176209, "grad_norm": 0.7530040740966797, "learning_rate": 3.365580090344329e-05, "loss": 1.4796, "step": 894 }, { "epoch": 0.05888448443179762, "grad_norm": 0.8272554278373718, "learning_rate": 3.3554645288332775e-05, "loss": 1.3817, "step": 895 }, { "epoch": 0.05895027715183315, "grad_norm": 0.8762139678001404, "learning_rate": 3.345356509253959e-05, "loss": 1.4379, "step": 896 }, { "epoch": 0.05901606987186868, "grad_norm": 0.9064789414405823, "learning_rate": 3.335256077962317e-05, "loss": 1.5902, "step": 897 }, { "epoch": 0.059081862591904205, "grad_norm": 0.8573328852653503, "learning_rate": 3.325163281279494e-05, "loss": 1.4975, "step": 898 }, { "epoch": 0.05914765531193973, "grad_norm": 0.9656907320022583, "learning_rate": 3.315078165491622e-05, "loss": 1.5053, "step": 899 }, { "epoch": 0.059213448031975265, "grad_norm": 1.2840112447738647, "learning_rate": 3.305000776849606e-05, "loss": 1.2657, "step": 900 }, { "epoch": 0.05927924075201079, "grad_norm": 0.5296594500541687, "learning_rate": 3.294931161568914e-05, "loss": 1.7877, "step": 901 }, { "epoch": 0.05934503347204632, "grad_norm": 0.5523350834846497, "learning_rate": 3.2848693658293675e-05, "loss": 1.6165, "step": 902 }, { "epoch": 0.05941082619208184, "grad_norm": 0.5559424161911011, "learning_rate": 3.274815435774921e-05, "loss": 1.6415, "step": 903 }, { "epoch": 0.05947661891211738, "grad_norm": 0.6036515831947327, "learning_rate": 3.264769417513463e-05, "loss": 1.6959, "step": 904 }, { "epoch": 0.0595424116321529, "grad_norm": 0.5790659189224243, "learning_rate": 3.254731357116597e-05, "loss": 1.6368, "step": 905 }, { "epoch": 0.05960820435218843, "grad_norm": 0.5860310792922974, "learning_rate": 3.244701300619427e-05, "loss": 1.5645, "step": 906 }, { "epoch": 0.059673997072223955, "grad_norm": 0.5953836441040039, "learning_rate": 3.2346792940203554e-05, "loss": 1.5427, "step": 907 }, { "epoch": 0.05973978979225949, "grad_norm": 0.574989378452301, "learning_rate": 3.224665383280867e-05, "loss": 1.7678, "step": 908 }, { "epoch": 0.059805582512295015, "grad_norm": 0.5947942733764648, "learning_rate": 3.214659614325313e-05, "loss": 1.5293, "step": 909 }, { "epoch": 0.05987137523233054, "grad_norm": 0.5816536545753479, "learning_rate": 3.2046620330407116e-05, "loss": 1.4343, "step": 910 }, { "epoch": 0.05993716795236607, "grad_norm": 0.5763691663742065, "learning_rate": 3.194672685276532e-05, "loss": 1.6881, "step": 911 }, { "epoch": 0.0600029606724016, "grad_norm": 0.629294753074646, "learning_rate": 3.1846916168444806e-05, "loss": 1.7855, "step": 912 }, { "epoch": 0.06006875339243713, "grad_norm": 0.6405010223388672, "learning_rate": 3.174718873518298e-05, "loss": 1.6608, "step": 913 }, { "epoch": 0.06013454611247265, "grad_norm": 0.5852246284484863, "learning_rate": 3.16475450103354e-05, "loss": 1.6286, "step": 914 }, { "epoch": 0.06020033883250818, "grad_norm": 0.6519684195518494, "learning_rate": 3.154798545087379e-05, "loss": 1.6609, "step": 915 }, { "epoch": 0.06026613155254371, "grad_norm": 0.6138909459114075, "learning_rate": 3.144851051338385e-05, "loss": 1.7577, "step": 916 }, { "epoch": 0.06033192427257924, "grad_norm": 0.5902788639068604, "learning_rate": 3.1349120654063225e-05, "loss": 1.622, "step": 917 }, { "epoch": 0.060397716992614765, "grad_norm": 0.6420292854309082, "learning_rate": 3.124981632871937e-05, "loss": 1.6161, "step": 918 }, { "epoch": 0.0604635097126503, "grad_norm": 0.6139567494392395, "learning_rate": 3.115059799276748e-05, "loss": 1.682, "step": 919 }, { "epoch": 0.060529302432685825, "grad_norm": 0.6504656672477722, "learning_rate": 3.1051466101228385e-05, "loss": 1.5961, "step": 920 }, { "epoch": 0.06059509515272135, "grad_norm": 0.6670247912406921, "learning_rate": 3.09524211087265e-05, "loss": 1.6288, "step": 921 }, { "epoch": 0.06066088787275688, "grad_norm": 0.6252246499061584, "learning_rate": 3.08534634694877e-05, "loss": 1.527, "step": 922 }, { "epoch": 0.06072668059279241, "grad_norm": 0.6155936121940613, "learning_rate": 3.0754593637337276e-05, "loss": 1.5781, "step": 923 }, { "epoch": 0.06079247331282794, "grad_norm": 0.6207473278045654, "learning_rate": 3.065581206569782e-05, "loss": 1.3617, "step": 924 }, { "epoch": 0.06085826603286346, "grad_norm": 0.6116368174552917, "learning_rate": 3.055711920758709e-05, "loss": 1.6183, "step": 925 }, { "epoch": 0.06092405875289899, "grad_norm": 0.646839439868927, "learning_rate": 3.0458515515616115e-05, "loss": 1.5567, "step": 926 }, { "epoch": 0.06098985147293452, "grad_norm": 0.6746354103088379, "learning_rate": 3.036000144198693e-05, "loss": 1.5467, "step": 927 }, { "epoch": 0.06105564419297005, "grad_norm": 0.639434278011322, "learning_rate": 3.0261577438490585e-05, "loss": 1.5425, "step": 928 }, { "epoch": 0.061121436913005575, "grad_norm": 0.685247540473938, "learning_rate": 3.0163243956505095e-05, "loss": 1.6157, "step": 929 }, { "epoch": 0.0611872296330411, "grad_norm": 0.6764075756072998, "learning_rate": 3.006500144699328e-05, "loss": 1.5882, "step": 930 }, { "epoch": 0.061253022353076635, "grad_norm": 0.6915985345840454, "learning_rate": 2.99668503605008e-05, "loss": 1.51, "step": 931 }, { "epoch": 0.06131881507311216, "grad_norm": 0.74942946434021, "learning_rate": 2.986879114715403e-05, "loss": 1.4684, "step": 932 }, { "epoch": 0.06138460779314769, "grad_norm": 0.6873716711997986, "learning_rate": 2.977082425665802e-05, "loss": 1.6268, "step": 933 }, { "epoch": 0.06145040051318321, "grad_norm": 0.7743644714355469, "learning_rate": 2.967295013829442e-05, "loss": 1.564, "step": 934 }, { "epoch": 0.06151619323321875, "grad_norm": 0.702805757522583, "learning_rate": 2.95751692409194e-05, "loss": 1.7741, "step": 935 }, { "epoch": 0.06158198595325427, "grad_norm": 0.7666968107223511, "learning_rate": 2.947748201296163e-05, "loss": 1.6298, "step": 936 }, { "epoch": 0.0616477786732898, "grad_norm": 0.700221061706543, "learning_rate": 2.9379888902420215e-05, "loss": 1.5275, "step": 937 }, { "epoch": 0.061713571393325325, "grad_norm": 0.7477214932441711, "learning_rate": 2.9282390356862606e-05, "loss": 1.5412, "step": 938 }, { "epoch": 0.06177936411336086, "grad_norm": 0.7472383975982666, "learning_rate": 2.9184986823422623e-05, "loss": 1.547, "step": 939 }, { "epoch": 0.061845156833396385, "grad_norm": 0.7094493508338928, "learning_rate": 2.9087678748798297e-05, "loss": 1.4617, "step": 940 }, { "epoch": 0.06191094955343191, "grad_norm": 0.7252383828163147, "learning_rate": 2.899046657924992e-05, "loss": 1.3699, "step": 941 }, { "epoch": 0.06197674227346744, "grad_norm": 0.7631134390830994, "learning_rate": 2.8893350760597924e-05, "loss": 1.567, "step": 942 }, { "epoch": 0.06204253499350297, "grad_norm": 0.7023096680641174, "learning_rate": 2.8796331738220912e-05, "loss": 1.4324, "step": 943 }, { "epoch": 0.0621083277135385, "grad_norm": 0.7988854050636292, "learning_rate": 2.8699409957053535e-05, "loss": 1.6788, "step": 944 }, { "epoch": 0.06217412043357402, "grad_norm": 0.8351010084152222, "learning_rate": 2.860258586158454e-05, "loss": 1.6451, "step": 945 }, { "epoch": 0.062239913153609557, "grad_norm": 0.8499585390090942, "learning_rate": 2.8505859895854604e-05, "loss": 1.6087, "step": 946 }, { "epoch": 0.06230570587364508, "grad_norm": 0.9862839579582214, "learning_rate": 2.840923250345442e-05, "loss": 1.5487, "step": 947 }, { "epoch": 0.06237149859368061, "grad_norm": 0.9058715105056763, "learning_rate": 2.8312704127522655e-05, "loss": 1.3746, "step": 948 }, { "epoch": 0.062437291313716135, "grad_norm": 0.9685963988304138, "learning_rate": 2.821627521074383e-05, "loss": 1.4866, "step": 949 }, { "epoch": 0.06250308403375167, "grad_norm": 1.311897873878479, "learning_rate": 2.8119946195346375e-05, "loss": 1.5507, "step": 950 }, { "epoch": 0.0625688767537872, "grad_norm": 0.5097134113311768, "learning_rate": 2.8023717523100508e-05, "loss": 1.8342, "step": 951 }, { "epoch": 0.06263466947382272, "grad_norm": 0.5375131368637085, "learning_rate": 2.792758963531632e-05, "loss": 1.6841, "step": 952 }, { "epoch": 0.06270046219385825, "grad_norm": 0.6458749771118164, "learning_rate": 2.7831562972841696e-05, "loss": 2.0434, "step": 953 }, { "epoch": 0.06276625491389377, "grad_norm": 0.5497531890869141, "learning_rate": 2.773563797606028e-05, "loss": 1.6057, "step": 954 }, { "epoch": 0.0628320476339293, "grad_norm": 0.5604535341262817, "learning_rate": 2.7639815084889476e-05, "loss": 1.6338, "step": 955 }, { "epoch": 0.06289784035396484, "grad_norm": 0.6276973485946655, "learning_rate": 2.7544094738778436e-05, "loss": 1.7273, "step": 956 }, { "epoch": 0.06296363307400037, "grad_norm": 0.6032599210739136, "learning_rate": 2.744847737670601e-05, "loss": 1.6246, "step": 957 }, { "epoch": 0.06302942579403589, "grad_norm": 0.6191315650939941, "learning_rate": 2.7352963437178786e-05, "loss": 1.6568, "step": 958 }, { "epoch": 0.06309521851407142, "grad_norm": 0.5911609530448914, "learning_rate": 2.7257553358229034e-05, "loss": 1.6472, "step": 959 }, { "epoch": 0.06316101123410695, "grad_norm": 0.5993046164512634, "learning_rate": 2.7162247577412715e-05, "loss": 1.5908, "step": 960 }, { "epoch": 0.06322680395414247, "grad_norm": 0.6404312252998352, "learning_rate": 2.7067046531807494e-05, "loss": 1.6952, "step": 961 }, { "epoch": 0.063292596674178, "grad_norm": 0.6177670359611511, "learning_rate": 2.6971950658010666e-05, "loss": 1.5803, "step": 962 }, { "epoch": 0.06335838939421352, "grad_norm": 0.6164083480834961, "learning_rate": 2.6876960392137217e-05, "loss": 1.5917, "step": 963 }, { "epoch": 0.06342418211424906, "grad_norm": 0.6648538708686829, "learning_rate": 2.678207616981787e-05, "loss": 1.6052, "step": 964 }, { "epoch": 0.06348997483428459, "grad_norm": 0.6657065153121948, "learning_rate": 2.6687298426196973e-05, "loss": 1.7399, "step": 965 }, { "epoch": 0.06355576755432012, "grad_norm": 0.6626632213592529, "learning_rate": 2.6592627595930542e-05, "loss": 1.6635, "step": 966 }, { "epoch": 0.06362156027435564, "grad_norm": 0.6235553622245789, "learning_rate": 2.6498064113184338e-05, "loss": 1.6467, "step": 967 }, { "epoch": 0.06368735299439117, "grad_norm": 0.6630668044090271, "learning_rate": 2.6403608411631742e-05, "loss": 1.6262, "step": 968 }, { "epoch": 0.0637531457144267, "grad_norm": 0.6901178956031799, "learning_rate": 2.6309260924451907e-05, "loss": 1.687, "step": 969 }, { "epoch": 0.06381893843446222, "grad_norm": 0.6554874777793884, "learning_rate": 2.62150220843277e-05, "loss": 1.7402, "step": 970 }, { "epoch": 0.06388473115449775, "grad_norm": 0.6539959907531738, "learning_rate": 2.612089232344371e-05, "loss": 1.7455, "step": 971 }, { "epoch": 0.06395052387453329, "grad_norm": 0.6740013360977173, "learning_rate": 2.602687207348429e-05, "loss": 1.6105, "step": 972 }, { "epoch": 0.06401631659456881, "grad_norm": 0.6463841199874878, "learning_rate": 2.593296176563157e-05, "loss": 1.5821, "step": 973 }, { "epoch": 0.06408210931460434, "grad_norm": 0.6570295691490173, "learning_rate": 2.5839161830563474e-05, "loss": 1.6883, "step": 974 }, { "epoch": 0.06414790203463987, "grad_norm": 0.6713987588882446, "learning_rate": 2.5745472698451767e-05, "loss": 1.5053, "step": 975 }, { "epoch": 0.0642136947546754, "grad_norm": 0.6906765103340149, "learning_rate": 2.565189479896005e-05, "loss": 1.5105, "step": 976 }, { "epoch": 0.06427948747471092, "grad_norm": 0.6981975436210632, "learning_rate": 2.555842856124182e-05, "loss": 1.5976, "step": 977 }, { "epoch": 0.06434528019474645, "grad_norm": 0.7114136815071106, "learning_rate": 2.546507441393845e-05, "loss": 1.6393, "step": 978 }, { "epoch": 0.06441107291478199, "grad_norm": 0.6741265058517456, "learning_rate": 2.5371832785177273e-05, "loss": 1.6601, "step": 979 }, { "epoch": 0.06447686563481751, "grad_norm": 0.7347429990768433, "learning_rate": 2.5278704102569662e-05, "loss": 1.6784, "step": 980 }, { "epoch": 0.06454265835485304, "grad_norm": 0.6604593992233276, "learning_rate": 2.518568879320895e-05, "loss": 1.5858, "step": 981 }, { "epoch": 0.06460845107488856, "grad_norm": 0.6967578530311584, "learning_rate": 2.509278728366855e-05, "loss": 1.5591, "step": 982 }, { "epoch": 0.06467424379492409, "grad_norm": 0.744243323802948, "learning_rate": 2.500000000000001e-05, "loss": 1.5185, "step": 983 }, { "epoch": 0.06474003651495962, "grad_norm": 0.7131791114807129, "learning_rate": 2.4907327367730963e-05, "loss": 1.5966, "step": 984 }, { "epoch": 0.06480582923499514, "grad_norm": 0.745911180973053, "learning_rate": 2.4814769811863313e-05, "loss": 1.6642, "step": 985 }, { "epoch": 0.06487162195503067, "grad_norm": 0.7129960656166077, "learning_rate": 2.472232775687119e-05, "loss": 1.4803, "step": 986 }, { "epoch": 0.06493741467506621, "grad_norm": 0.7604677677154541, "learning_rate": 2.463000162669903e-05, "loss": 1.4905, "step": 987 }, { "epoch": 0.06500320739510174, "grad_norm": 0.7828547358512878, "learning_rate": 2.453779184475964e-05, "loss": 1.5679, "step": 988 }, { "epoch": 0.06506900011513726, "grad_norm": 0.7184737324714661, "learning_rate": 2.4445698833932234e-05, "loss": 1.4162, "step": 989 }, { "epoch": 0.06513479283517279, "grad_norm": 0.7497789263725281, "learning_rate": 2.4353723016560527e-05, "loss": 1.5783, "step": 990 }, { "epoch": 0.06520058555520832, "grad_norm": 0.7946657538414001, "learning_rate": 2.4261864814450758e-05, "loss": 1.5937, "step": 991 }, { "epoch": 0.06526637827524384, "grad_norm": 0.7872624397277832, "learning_rate": 2.417012464886978e-05, "loss": 1.5904, "step": 992 }, { "epoch": 0.06533217099527937, "grad_norm": 0.8094744682312012, "learning_rate": 2.4078502940543157e-05, "loss": 1.5454, "step": 993 }, { "epoch": 0.0653979637153149, "grad_norm": 0.9983581304550171, "learning_rate": 2.3987000109653134e-05, "loss": 1.4996, "step": 994 }, { "epoch": 0.06546375643535043, "grad_norm": 0.826250433921814, "learning_rate": 2.389561657583681e-05, "loss": 1.5753, "step": 995 }, { "epoch": 0.06552954915538596, "grad_norm": 0.8517383337020874, "learning_rate": 2.3804352758184223e-05, "loss": 1.5688, "step": 996 }, { "epoch": 0.06559534187542149, "grad_norm": 0.8434426784515381, "learning_rate": 2.3713209075236343e-05, "loss": 1.3069, "step": 997 }, { "epoch": 0.06566113459545701, "grad_norm": 0.9146838784217834, "learning_rate": 2.3622185944983188e-05, "loss": 1.4057, "step": 998 }, { "epoch": 0.06572692731549254, "grad_norm": 1.0109113454818726, "learning_rate": 2.3531283784861952e-05, "loss": 1.6269, "step": 999 }, { "epoch": 0.06579272003552807, "grad_norm": 1.2136893272399902, "learning_rate": 2.3440503011755e-05, "loss": 1.5136, "step": 1000 }, { "epoch": 0.06585851275556359, "grad_norm": 0.5091771483421326, "learning_rate": 2.3349844041988045e-05, "loss": 1.7648, "step": 1001 }, { "epoch": 0.06592430547559912, "grad_norm": 0.5724332928657532, "learning_rate": 2.325930729132821e-05, "loss": 1.8152, "step": 1002 }, { "epoch": 0.06599009819563466, "grad_norm": 0.5698429942131042, "learning_rate": 2.3168893174982098e-05, "loss": 1.6787, "step": 1003 }, { "epoch": 0.06605589091567018, "grad_norm": 0.5834808945655823, "learning_rate": 2.30786021075939e-05, "loss": 1.6803, "step": 1004 }, { "epoch": 0.06612168363570571, "grad_norm": 0.588674008846283, "learning_rate": 2.2988434503243516e-05, "loss": 1.5799, "step": 1005 }, { "epoch": 0.06618747635574124, "grad_norm": 0.6044074296951294, "learning_rate": 2.289839077544463e-05, "loss": 1.7156, "step": 1006 }, { "epoch": 0.06625326907577676, "grad_norm": 0.6402976512908936, "learning_rate": 2.28084713371428e-05, "loss": 1.7041, "step": 1007 }, { "epoch": 0.06631906179581229, "grad_norm": 0.5715565085411072, "learning_rate": 2.2718676600713622e-05, "loss": 1.5912, "step": 1008 }, { "epoch": 0.06638485451584782, "grad_norm": 0.6274817585945129, "learning_rate": 2.2629006977960798e-05, "loss": 1.6121, "step": 1009 }, { "epoch": 0.06645064723588336, "grad_norm": 0.5526905655860901, "learning_rate": 2.2539462880114194e-05, "loss": 1.5085, "step": 1010 }, { "epoch": 0.06651643995591888, "grad_norm": 0.6097638010978699, "learning_rate": 2.245004471782806e-05, "loss": 1.5009, "step": 1011 }, { "epoch": 0.06658223267595441, "grad_norm": 0.6747389435768127, "learning_rate": 2.236075290117914e-05, "loss": 1.6578, "step": 1012 }, { "epoch": 0.06664802539598993, "grad_norm": 0.6369504928588867, "learning_rate": 2.2271587839664672e-05, "loss": 1.5528, "step": 1013 }, { "epoch": 0.06671381811602546, "grad_norm": 0.6106559038162231, "learning_rate": 2.218254994220062e-05, "loss": 1.5907, "step": 1014 }, { "epoch": 0.06677961083606099, "grad_norm": 0.5885758399963379, "learning_rate": 2.2093639617119794e-05, "loss": 1.513, "step": 1015 }, { "epoch": 0.06684540355609651, "grad_norm": 0.6991572380065918, "learning_rate": 2.2004857272169876e-05, "loss": 1.823, "step": 1016 }, { "epoch": 0.06691119627613204, "grad_norm": 0.6160642504692078, "learning_rate": 2.1916203314511692e-05, "loss": 1.6389, "step": 1017 }, { "epoch": 0.06697698899616758, "grad_norm": 0.6292829513549805, "learning_rate": 2.1827678150717256e-05, "loss": 1.6663, "step": 1018 }, { "epoch": 0.0670427817162031, "grad_norm": 0.6121276617050171, "learning_rate": 2.1739282186767923e-05, "loss": 1.5885, "step": 1019 }, { "epoch": 0.06710857443623863, "grad_norm": 0.6440593004226685, "learning_rate": 2.165101582805254e-05, "loss": 1.6319, "step": 1020 }, { "epoch": 0.06717436715627416, "grad_norm": 0.636544942855835, "learning_rate": 2.1562879479365556e-05, "loss": 1.644, "step": 1021 }, { "epoch": 0.06724015987630969, "grad_norm": 0.6443448662757874, "learning_rate": 2.1474873544905205e-05, "loss": 1.5995, "step": 1022 }, { "epoch": 0.06730595259634521, "grad_norm": 0.6514659523963928, "learning_rate": 2.1386998428271633e-05, "loss": 1.554, "step": 1023 }, { "epoch": 0.06737174531638074, "grad_norm": 0.6635521650314331, "learning_rate": 2.129925453246504e-05, "loss": 1.7461, "step": 1024 }, { "epoch": 0.06743753803641626, "grad_norm": 0.6211679577827454, "learning_rate": 2.1211642259883867e-05, "loss": 1.4678, "step": 1025 }, { "epoch": 0.0675033307564518, "grad_norm": 0.668057918548584, "learning_rate": 2.1124162012322862e-05, "loss": 1.6624, "step": 1026 }, { "epoch": 0.06756912347648733, "grad_norm": 0.6626843810081482, "learning_rate": 2.1036814190971333e-05, "loss": 1.5244, "step": 1027 }, { "epoch": 0.06763491619652286, "grad_norm": 0.6613166332244873, "learning_rate": 2.0949599196411325e-05, "loss": 1.497, "step": 1028 }, { "epoch": 0.06770070891655838, "grad_norm": 0.6597977876663208, "learning_rate": 2.086251742861565e-05, "loss": 1.711, "step": 1029 }, { "epoch": 0.06776650163659391, "grad_norm": 0.7360920310020447, "learning_rate": 2.077556928694619e-05, "loss": 1.6281, "step": 1030 }, { "epoch": 0.06783229435662944, "grad_norm": 0.6604809761047363, "learning_rate": 2.0688755170151996e-05, "loss": 1.6061, "step": 1031 }, { "epoch": 0.06789808707666496, "grad_norm": 0.6945058703422546, "learning_rate": 2.0602075476367432e-05, "loss": 1.6064, "step": 1032 }, { "epoch": 0.06796387979670049, "grad_norm": 0.71868896484375, "learning_rate": 2.051553060311045e-05, "loss": 1.56, "step": 1033 }, { "epoch": 0.06802967251673603, "grad_norm": 0.6665169596672058, "learning_rate": 2.0429120947280678e-05, "loss": 1.5953, "step": 1034 }, { "epoch": 0.06809546523677155, "grad_norm": 0.6772186756134033, "learning_rate": 2.0342846905157636e-05, "loss": 1.5658, "step": 1035 }, { "epoch": 0.06816125795680708, "grad_norm": 0.7677963376045227, "learning_rate": 2.0256708872398915e-05, "loss": 1.7299, "step": 1036 }, { "epoch": 0.06822705067684261, "grad_norm": 0.7391518950462341, "learning_rate": 2.017070724403835e-05, "loss": 1.6122, "step": 1037 }, { "epoch": 0.06829284339687813, "grad_norm": 0.8133260607719421, "learning_rate": 2.0084842414484222e-05, "loss": 1.5689, "step": 1038 }, { "epoch": 0.06835863611691366, "grad_norm": 0.7881767749786377, "learning_rate": 1.999911477751746e-05, "loss": 1.673, "step": 1039 }, { "epoch": 0.06842442883694919, "grad_norm": 0.7791821360588074, "learning_rate": 1.9913524726289784e-05, "loss": 1.7744, "step": 1040 }, { "epoch": 0.06849022155698473, "grad_norm": 0.7703235745429993, "learning_rate": 1.9828072653321978e-05, "loss": 1.4325, "step": 1041 }, { "epoch": 0.06855601427702025, "grad_norm": 0.7927573323249817, "learning_rate": 1.9742758950502045e-05, "loss": 1.6792, "step": 1042 }, { "epoch": 0.06862180699705578, "grad_norm": 0.7753776907920837, "learning_rate": 1.965758400908334e-05, "loss": 1.4926, "step": 1043 }, { "epoch": 0.0686875997170913, "grad_norm": 0.8299908638000488, "learning_rate": 1.957254821968298e-05, "loss": 1.743, "step": 1044 }, { "epoch": 0.06875339243712683, "grad_norm": 0.754217267036438, "learning_rate": 1.948765197227983e-05, "loss": 1.4112, "step": 1045 }, { "epoch": 0.06881918515716236, "grad_norm": 0.7843993902206421, "learning_rate": 1.9402895656212833e-05, "loss": 1.4893, "step": 1046 }, { "epoch": 0.06888497787719788, "grad_norm": 0.8628965616226196, "learning_rate": 1.9318279660179217e-05, "loss": 1.5237, "step": 1047 }, { "epoch": 0.06895077059723341, "grad_norm": 0.9835182428359985, "learning_rate": 1.9233804372232643e-05, "loss": 1.5732, "step": 1048 }, { "epoch": 0.06901656331726895, "grad_norm": 1.099034309387207, "learning_rate": 1.914947017978153e-05, "loss": 1.5774, "step": 1049 }, { "epoch": 0.06908235603730448, "grad_norm": 1.265041470527649, "learning_rate": 1.9065277469587204e-05, "loss": 1.3189, "step": 1050 }, { "epoch": 0.06914814875734, "grad_norm": 0.5150356888771057, "learning_rate": 1.8981226627762143e-05, "loss": 1.8932, "step": 1051 }, { "epoch": 0.06921394147737553, "grad_norm": 0.5652185678482056, "learning_rate": 1.889731803976822e-05, "loss": 1.8346, "step": 1052 }, { "epoch": 0.06927973419741106, "grad_norm": 0.5585607886314392, "learning_rate": 1.881355209041491e-05, "loss": 1.7214, "step": 1053 }, { "epoch": 0.06934552691744658, "grad_norm": 0.5780192613601685, "learning_rate": 1.8729929163857552e-05, "loss": 1.6242, "step": 1054 }, { "epoch": 0.06941131963748211, "grad_norm": 0.5689457654953003, "learning_rate": 1.8646449643595565e-05, "loss": 1.6942, "step": 1055 }, { "epoch": 0.06947711235751763, "grad_norm": 0.5897836685180664, "learning_rate": 1.8563113912470702e-05, "loss": 1.6164, "step": 1056 }, { "epoch": 0.06954290507755317, "grad_norm": 0.5599413514137268, "learning_rate": 1.847992235266529e-05, "loss": 1.5614, "step": 1057 }, { "epoch": 0.0696086977975887, "grad_norm": 0.5642629861831665, "learning_rate": 1.8396875345700497e-05, "loss": 1.54, "step": 1058 }, { "epoch": 0.06967449051762423, "grad_norm": 0.6085110902786255, "learning_rate": 1.8313973272434504e-05, "loss": 1.6037, "step": 1059 }, { "epoch": 0.06974028323765975, "grad_norm": 0.5913036465644836, "learning_rate": 1.8231216513060893e-05, "loss": 1.6223, "step": 1060 }, { "epoch": 0.06980607595769528, "grad_norm": 0.5918750166893005, "learning_rate": 1.8148605447106797e-05, "loss": 1.5262, "step": 1061 }, { "epoch": 0.0698718686777308, "grad_norm": 0.5838180184364319, "learning_rate": 1.8066140453431173e-05, "loss": 1.6248, "step": 1062 }, { "epoch": 0.06993766139776633, "grad_norm": 0.6557063460350037, "learning_rate": 1.798382191022313e-05, "loss": 1.7616, "step": 1063 }, { "epoch": 0.07000345411780187, "grad_norm": 0.6090285181999207, "learning_rate": 1.7901650195000068e-05, "loss": 1.6591, "step": 1064 }, { "epoch": 0.0700692468378374, "grad_norm": 0.6101527214050293, "learning_rate": 1.7819625684606102e-05, "loss": 1.5755, "step": 1065 }, { "epoch": 0.07013503955787292, "grad_norm": 0.5960958003997803, "learning_rate": 1.7737748755210214e-05, "loss": 1.5059, "step": 1066 }, { "epoch": 0.07020083227790845, "grad_norm": 0.631040096282959, "learning_rate": 1.76560197823046e-05, "loss": 1.5921, "step": 1067 }, { "epoch": 0.07026662499794398, "grad_norm": 0.6434530019760132, "learning_rate": 1.7574439140702902e-05, "loss": 1.5911, "step": 1068 }, { "epoch": 0.0703324177179795, "grad_norm": 0.6554668545722961, "learning_rate": 1.7493007204538503e-05, "loss": 1.7435, "step": 1069 }, { "epoch": 0.07039821043801503, "grad_norm": 0.6489726901054382, "learning_rate": 1.7411724347262824e-05, "loss": 1.7055, "step": 1070 }, { "epoch": 0.07046400315805056, "grad_norm": 0.628710150718689, "learning_rate": 1.7330590941643603e-05, "loss": 1.5092, "step": 1071 }, { "epoch": 0.0705297958780861, "grad_norm": 0.6012537479400635, "learning_rate": 1.7249607359763174e-05, "loss": 1.4466, "step": 1072 }, { "epoch": 0.07059558859812162, "grad_norm": 0.6696065664291382, "learning_rate": 1.7168773973016776e-05, "loss": 1.5727, "step": 1073 }, { "epoch": 0.07066138131815715, "grad_norm": 0.6296167373657227, "learning_rate": 1.7088091152110873e-05, "loss": 1.5393, "step": 1074 }, { "epoch": 0.07072717403819268, "grad_norm": 0.6769093871116638, "learning_rate": 1.7007559267061334e-05, "loss": 1.6523, "step": 1075 }, { "epoch": 0.0707929667582282, "grad_norm": 0.6519213914871216, "learning_rate": 1.692717868719195e-05, "loss": 1.5566, "step": 1076 }, { "epoch": 0.07085875947826373, "grad_norm": 0.6642208695411682, "learning_rate": 1.6846949781132548e-05, "loss": 1.5886, "step": 1077 }, { "epoch": 0.07092455219829925, "grad_norm": 0.6803880333900452, "learning_rate": 1.6766872916817377e-05, "loss": 1.6519, "step": 1078 }, { "epoch": 0.07099034491833478, "grad_norm": 0.7287022471427917, "learning_rate": 1.668694846148343e-05, "loss": 1.7061, "step": 1079 }, { "epoch": 0.07105613763837032, "grad_norm": 0.6571015119552612, "learning_rate": 1.660717678166871e-05, "loss": 1.5716, "step": 1080 }, { "epoch": 0.07112193035840585, "grad_norm": 0.6374773383140564, "learning_rate": 1.6527558243210623e-05, "loss": 1.455, "step": 1081 }, { "epoch": 0.07118772307844137, "grad_norm": 0.699365496635437, "learning_rate": 1.644809321124423e-05, "loss": 1.6404, "step": 1082 }, { "epoch": 0.0712535157984769, "grad_norm": 0.6866129636764526, "learning_rate": 1.6368782050200633e-05, "loss": 1.4377, "step": 1083 }, { "epoch": 0.07131930851851243, "grad_norm": 0.6566864252090454, "learning_rate": 1.6289625123805245e-05, "loss": 1.481, "step": 1084 }, { "epoch": 0.07138510123854795, "grad_norm": 0.7273065447807312, "learning_rate": 1.621062279507617e-05, "loss": 1.6732, "step": 1085 }, { "epoch": 0.07145089395858348, "grad_norm": 0.7388779520988464, "learning_rate": 1.6131775426322503e-05, "loss": 1.5516, "step": 1086 }, { "epoch": 0.071516686678619, "grad_norm": 0.7159716486930847, "learning_rate": 1.60530833791427e-05, "loss": 1.5815, "step": 1087 }, { "epoch": 0.07158247939865454, "grad_norm": 0.7883169054985046, "learning_rate": 1.597454701442288e-05, "loss": 1.5699, "step": 1088 }, { "epoch": 0.07164827211869007, "grad_norm": 0.7535433769226074, "learning_rate": 1.589616669233522e-05, "loss": 1.71, "step": 1089 }, { "epoch": 0.0717140648387256, "grad_norm": 0.7542433142662048, "learning_rate": 1.581794277233628e-05, "loss": 1.6412, "step": 1090 }, { "epoch": 0.07177985755876112, "grad_norm": 0.7680163383483887, "learning_rate": 1.5739875613165283e-05, "loss": 1.5868, "step": 1091 }, { "epoch": 0.07184565027879665, "grad_norm": 0.8039641380310059, "learning_rate": 1.566196557284264e-05, "loss": 1.6165, "step": 1092 }, { "epoch": 0.07191144299883218, "grad_norm": 0.8360242247581482, "learning_rate": 1.5584213008668147e-05, "loss": 1.4836, "step": 1093 }, { "epoch": 0.0719772357188677, "grad_norm": 0.9569554328918457, "learning_rate": 1.550661827721941e-05, "loss": 1.7255, "step": 1094 }, { "epoch": 0.07204302843890324, "grad_norm": 0.8485451340675354, "learning_rate": 1.5429181734350236e-05, "loss": 1.6198, "step": 1095 }, { "epoch": 0.07210882115893877, "grad_norm": 0.8728616237640381, "learning_rate": 1.53519037351889e-05, "loss": 1.6094, "step": 1096 }, { "epoch": 0.0721746138789743, "grad_norm": 0.9522159695625305, "learning_rate": 1.527478463413666e-05, "loss": 1.3929, "step": 1097 }, { "epoch": 0.07224040659900982, "grad_norm": 1.0558445453643799, "learning_rate": 1.5197824784866015e-05, "loss": 1.7511, "step": 1098 }, { "epoch": 0.07230619931904535, "grad_norm": 1.0129352807998657, "learning_rate": 1.5121024540319161e-05, "loss": 1.4986, "step": 1099 }, { "epoch": 0.07237199203908087, "grad_norm": 1.232009768486023, "learning_rate": 1.5044384252706312e-05, "loss": 1.2485, "step": 1100 }, { "epoch": 0.0724377847591164, "grad_norm": 0.5130254626274109, "learning_rate": 1.4967904273504113e-05, "loss": 1.816, "step": 1101 }, { "epoch": 0.07250357747915193, "grad_norm": 0.5423897504806519, "learning_rate": 1.4891584953454036e-05, "loss": 1.7201, "step": 1102 }, { "epoch": 0.07256937019918747, "grad_norm": 0.5203692317008972, "learning_rate": 1.4815426642560754e-05, "loss": 1.5166, "step": 1103 }, { "epoch": 0.07263516291922299, "grad_norm": 0.5805792212486267, "learning_rate": 1.4739429690090533e-05, "loss": 1.6115, "step": 1104 }, { "epoch": 0.07270095563925852, "grad_norm": 0.5422717928886414, "learning_rate": 1.4663594444569667e-05, "loss": 1.5951, "step": 1105 }, { "epoch": 0.07276674835929405, "grad_norm": 0.5676501989364624, "learning_rate": 1.4587921253782849e-05, "loss": 1.6266, "step": 1106 }, { "epoch": 0.07283254107932957, "grad_norm": 0.5662168264389038, "learning_rate": 1.4512410464771514e-05, "loss": 1.6486, "step": 1107 }, { "epoch": 0.0728983337993651, "grad_norm": 0.5962639451026917, "learning_rate": 1.4437062423832426e-05, "loss": 1.8598, "step": 1108 }, { "epoch": 0.07296412651940062, "grad_norm": 0.6214326620101929, "learning_rate": 1.4361877476515889e-05, "loss": 1.5881, "step": 1109 }, { "epoch": 0.07302991923943615, "grad_norm": 0.5972443222999573, "learning_rate": 1.428685596762429e-05, "loss": 1.6234, "step": 1110 }, { "epoch": 0.07309571195947169, "grad_norm": 0.5804590582847595, "learning_rate": 1.4211998241210484e-05, "loss": 1.7503, "step": 1111 }, { "epoch": 0.07316150467950722, "grad_norm": 0.6213569045066833, "learning_rate": 1.413730464057616e-05, "loss": 1.6576, "step": 1112 }, { "epoch": 0.07322729739954274, "grad_norm": 0.6171157360076904, "learning_rate": 1.406277550827037e-05, "loss": 1.6889, "step": 1113 }, { "epoch": 0.07329309011957827, "grad_norm": 0.63736891746521, "learning_rate": 1.3988411186087885e-05, "loss": 1.5542, "step": 1114 }, { "epoch": 0.0733588828396138, "grad_norm": 0.6111243367195129, "learning_rate": 1.3914212015067651e-05, "loss": 1.6043, "step": 1115 }, { "epoch": 0.07342467555964932, "grad_norm": 0.6354307532310486, "learning_rate": 1.3840178335491222e-05, "loss": 1.5426, "step": 1116 }, { "epoch": 0.07349046827968485, "grad_norm": 0.6276394128799438, "learning_rate": 1.376631048688119e-05, "loss": 1.5725, "step": 1117 }, { "epoch": 0.07355626099972037, "grad_norm": 0.6149314641952515, "learning_rate": 1.3692608807999652e-05, "loss": 1.6483, "step": 1118 }, { "epoch": 0.07362205371975591, "grad_norm": 0.6823477745056152, "learning_rate": 1.3619073636846625e-05, "loss": 1.6028, "step": 1119 }, { "epoch": 0.07368784643979144, "grad_norm": 0.6506856083869934, "learning_rate": 1.3545705310658529e-05, "loss": 1.5915, "step": 1120 }, { "epoch": 0.07375363915982697, "grad_norm": 0.6432639956474304, "learning_rate": 1.3472504165906613e-05, "loss": 1.5872, "step": 1121 }, { "epoch": 0.0738194318798625, "grad_norm": 0.6213419437408447, "learning_rate": 1.3399470538295434e-05, "loss": 1.5422, "step": 1122 }, { "epoch": 0.07388522459989802, "grad_norm": 0.6513727903366089, "learning_rate": 1.3326604762761258e-05, "loss": 1.6088, "step": 1123 }, { "epoch": 0.07395101731993355, "grad_norm": 0.6472317576408386, "learning_rate": 1.3253907173470648e-05, "loss": 1.5638, "step": 1124 }, { "epoch": 0.07401681003996907, "grad_norm": 0.6375536322593689, "learning_rate": 1.3181378103818814e-05, "loss": 1.4387, "step": 1125 }, { "epoch": 0.07408260276000461, "grad_norm": 0.6542819738388062, "learning_rate": 1.3109017886428122e-05, "loss": 1.5019, "step": 1126 }, { "epoch": 0.07414839548004014, "grad_norm": 0.6725215911865234, "learning_rate": 1.30368268531466e-05, "loss": 1.5493, "step": 1127 }, { "epoch": 0.07421418820007566, "grad_norm": 0.695514976978302, "learning_rate": 1.2964805335046332e-05, "loss": 1.7503, "step": 1128 }, { "epoch": 0.07427998092011119, "grad_norm": 0.6565685868263245, "learning_rate": 1.2892953662422047e-05, "loss": 1.4902, "step": 1129 }, { "epoch": 0.07434577364014672, "grad_norm": 0.7066967487335205, "learning_rate": 1.2821272164789544e-05, "loss": 1.5197, "step": 1130 }, { "epoch": 0.07441156636018224, "grad_norm": 0.686930239200592, "learning_rate": 1.2749761170884179e-05, "loss": 1.596, "step": 1131 }, { "epoch": 0.07447735908021777, "grad_norm": 0.7322065830230713, "learning_rate": 1.2678421008659375e-05, "loss": 1.5857, "step": 1132 }, { "epoch": 0.0745431518002533, "grad_norm": 0.6980546116828918, "learning_rate": 1.2607252005285109e-05, "loss": 1.5733, "step": 1133 }, { "epoch": 0.07460894452028884, "grad_norm": 0.7424272298812866, "learning_rate": 1.2536254487146415e-05, "loss": 1.6048, "step": 1134 }, { "epoch": 0.07467473724032436, "grad_norm": 0.708510160446167, "learning_rate": 1.2465428779841882e-05, "loss": 1.5323, "step": 1135 }, { "epoch": 0.07474052996035989, "grad_norm": 0.7060134410858154, "learning_rate": 1.2394775208182174e-05, "loss": 1.5616, "step": 1136 }, { "epoch": 0.07480632268039542, "grad_norm": 0.7687208652496338, "learning_rate": 1.2324294096188526e-05, "loss": 1.6314, "step": 1137 }, { "epoch": 0.07487211540043094, "grad_norm": 0.7260802388191223, "learning_rate": 1.2253985767091274e-05, "loss": 1.5503, "step": 1138 }, { "epoch": 0.07493790812046647, "grad_norm": 0.7328141331672668, "learning_rate": 1.2183850543328312e-05, "loss": 1.5422, "step": 1139 }, { "epoch": 0.075003700840502, "grad_norm": 0.7931233048439026, "learning_rate": 1.2113888746543738e-05, "loss": 1.4909, "step": 1140 }, { "epoch": 0.07506949356053752, "grad_norm": 0.7329046130180359, "learning_rate": 1.2044100697586263e-05, "loss": 1.4206, "step": 1141 }, { "epoch": 0.07513528628057306, "grad_norm": 0.8105408549308777, "learning_rate": 1.1974486716507783e-05, "loss": 1.6199, "step": 1142 }, { "epoch": 0.07520107900060859, "grad_norm": 0.8885856866836548, "learning_rate": 1.1905047122561924e-05, "loss": 1.7629, "step": 1143 }, { "epoch": 0.07526687172064411, "grad_norm": 0.8523672223091125, "learning_rate": 1.1835782234202525e-05, "loss": 1.505, "step": 1144 }, { "epoch": 0.07533266444067964, "grad_norm": 0.8462732434272766, "learning_rate": 1.1766692369082255e-05, "loss": 1.5047, "step": 1145 }, { "epoch": 0.07539845716071517, "grad_norm": 0.8410788178443909, "learning_rate": 1.1697777844051105e-05, "loss": 1.5502, "step": 1146 }, { "epoch": 0.07546424988075069, "grad_norm": 0.8786489367485046, "learning_rate": 1.1629038975154943e-05, "loss": 1.4928, "step": 1147 }, { "epoch": 0.07553004260078622, "grad_norm": 0.9621903896331787, "learning_rate": 1.156047607763407e-05, "loss": 1.4004, "step": 1148 }, { "epoch": 0.07559583532082174, "grad_norm": 1.068969488143921, "learning_rate": 1.1492089465921768e-05, "loss": 1.5004, "step": 1149 }, { "epoch": 0.07566162804085728, "grad_norm": 1.3039774894714355, "learning_rate": 1.1423879453642878e-05, "loss": 1.5061, "step": 1150 }, { "epoch": 0.07572742076089281, "grad_norm": 0.5217190384864807, "learning_rate": 1.135584635361232e-05, "loss": 1.8168, "step": 1151 }, { "epoch": 0.07579321348092834, "grad_norm": 0.5663981437683105, "learning_rate": 1.128799047783371e-05, "loss": 1.7287, "step": 1152 }, { "epoch": 0.07585900620096386, "grad_norm": 0.5568148493766785, "learning_rate": 1.122031213749789e-05, "loss": 1.611, "step": 1153 }, { "epoch": 0.07592479892099939, "grad_norm": 0.6089836359024048, "learning_rate": 1.115281164298153e-05, "loss": 1.5694, "step": 1154 }, { "epoch": 0.07599059164103492, "grad_norm": 0.5871589183807373, "learning_rate": 1.1085489303845637e-05, "loss": 1.6517, "step": 1155 }, { "epoch": 0.07605638436107044, "grad_norm": 0.5930696725845337, "learning_rate": 1.101834542883427e-05, "loss": 1.6933, "step": 1156 }, { "epoch": 0.07612217708110598, "grad_norm": 0.5683667659759521, "learning_rate": 1.0951380325872979e-05, "loss": 1.6172, "step": 1157 }, { "epoch": 0.07618796980114151, "grad_norm": 0.5930812954902649, "learning_rate": 1.088459430206748e-05, "loss": 1.7436, "step": 1158 }, { "epoch": 0.07625376252117703, "grad_norm": 0.5984984040260315, "learning_rate": 1.0817987663702229e-05, "loss": 1.8179, "step": 1159 }, { "epoch": 0.07631955524121256, "grad_norm": 0.6047726273536682, "learning_rate": 1.0751560716238967e-05, "loss": 1.5642, "step": 1160 }, { "epoch": 0.07638534796124809, "grad_norm": 0.6473960280418396, "learning_rate": 1.0685313764315413e-05, "loss": 1.7192, "step": 1161 }, { "epoch": 0.07645114068128361, "grad_norm": 0.6276887059211731, "learning_rate": 1.0619247111743797e-05, "loss": 1.532, "step": 1162 }, { "epoch": 0.07651693340131914, "grad_norm": 0.6259212493896484, "learning_rate": 1.055336106150948e-05, "loss": 1.5419, "step": 1163 }, { "epoch": 0.07658272612135467, "grad_norm": 0.6881235241889954, "learning_rate": 1.048765591576959e-05, "loss": 1.7839, "step": 1164 }, { "epoch": 0.0766485188413902, "grad_norm": 0.6311882138252258, "learning_rate": 1.0422131975851584e-05, "loss": 1.5377, "step": 1165 }, { "epoch": 0.07671431156142573, "grad_norm": 0.6650845408439636, "learning_rate": 1.0356789542251938e-05, "loss": 1.7409, "step": 1166 }, { "epoch": 0.07678010428146126, "grad_norm": 0.6411380767822266, "learning_rate": 1.0291628914634694e-05, "loss": 1.6915, "step": 1167 }, { "epoch": 0.07684589700149679, "grad_norm": 0.6582992672920227, "learning_rate": 1.022665039183015e-05, "loss": 1.5401, "step": 1168 }, { "epoch": 0.07691168972153231, "grad_norm": 0.6362737417221069, "learning_rate": 1.0161854271833443e-05, "loss": 1.5978, "step": 1169 }, { "epoch": 0.07697748244156784, "grad_norm": 0.6066959500312805, "learning_rate": 1.009724085180322e-05, "loss": 1.6114, "step": 1170 }, { "epoch": 0.07704327516160336, "grad_norm": 0.6722879409790039, "learning_rate": 1.0032810428060218e-05, "loss": 1.597, "step": 1171 }, { "epoch": 0.07710906788163889, "grad_norm": 0.6590387225151062, "learning_rate": 9.96856329608597e-06, "loss": 1.5352, "step": 1172 }, { "epoch": 0.07717486060167443, "grad_norm": 0.6803810000419617, "learning_rate": 9.90449975052144e-06, "loss": 1.5224, "step": 1173 }, { "epoch": 0.07724065332170996, "grad_norm": 0.6694747805595398, "learning_rate": 9.840620085165626e-06, "loss": 1.7062, "step": 1174 }, { "epoch": 0.07730644604174548, "grad_norm": 0.672380268573761, "learning_rate": 9.776924592974256e-06, "loss": 1.6693, "step": 1175 }, { "epoch": 0.07737223876178101, "grad_norm": 0.6489235162734985, "learning_rate": 9.713413566058405e-06, "loss": 1.5843, "step": 1176 }, { "epoch": 0.07743803148181654, "grad_norm": 0.6905193328857422, "learning_rate": 9.650087295683202e-06, "loss": 1.593, "step": 1177 }, { "epoch": 0.07750382420185206, "grad_norm": 0.7134239673614502, "learning_rate": 9.586946072266478e-06, "loss": 1.5109, "step": 1178 }, { "epoch": 0.07756961692188759, "grad_norm": 0.6946320533752441, "learning_rate": 9.523990185377413e-06, "loss": 1.5549, "step": 1179 }, { "epoch": 0.07763540964192311, "grad_norm": 0.6910642981529236, "learning_rate": 9.461219923735227e-06, "loss": 1.7573, "step": 1180 }, { "epoch": 0.07770120236195865, "grad_norm": 0.6611814498901367, "learning_rate": 9.398635575207854e-06, "loss": 1.5795, "step": 1181 }, { "epoch": 0.07776699508199418, "grad_norm": 0.6925032734870911, "learning_rate": 9.336237426810624e-06, "loss": 1.6087, "step": 1182 }, { "epoch": 0.07783278780202971, "grad_norm": 0.7224239706993103, "learning_rate": 9.274025764704936e-06, "loss": 1.5171, "step": 1183 }, { "epoch": 0.07789858052206523, "grad_norm": 0.696713387966156, "learning_rate": 9.212000874196953e-06, "loss": 1.4585, "step": 1184 }, { "epoch": 0.07796437324210076, "grad_norm": 0.7397194504737854, "learning_rate": 9.150163039736297e-06, "loss": 1.551, "step": 1185 }, { "epoch": 0.07803016596213629, "grad_norm": 0.696258008480072, "learning_rate": 9.08851254491475e-06, "loss": 1.5841, "step": 1186 }, { "epoch": 0.07809595868217181, "grad_norm": 0.7256150841712952, "learning_rate": 9.027049672464916e-06, "loss": 1.6002, "step": 1187 }, { "epoch": 0.07816175140220735, "grad_norm": 0.7567223906517029, "learning_rate": 8.965774704258956e-06, "loss": 1.6436, "step": 1188 }, { "epoch": 0.07822754412224288, "grad_norm": 0.7757462859153748, "learning_rate": 8.90468792130733e-06, "loss": 1.7119, "step": 1189 }, { "epoch": 0.0782933368422784, "grad_norm": 0.7670205235481262, "learning_rate": 8.843789603757446e-06, "loss": 1.6326, "step": 1190 }, { "epoch": 0.07835912956231393, "grad_norm": 0.8732237219810486, "learning_rate": 8.783080030892394e-06, "loss": 1.415, "step": 1191 }, { "epoch": 0.07842492228234946, "grad_norm": 0.8682703375816345, "learning_rate": 8.72255948112966e-06, "loss": 1.629, "step": 1192 }, { "epoch": 0.07849071500238498, "grad_norm": 0.7908897399902344, "learning_rate": 8.662228232019876e-06, "loss": 1.3307, "step": 1193 }, { "epoch": 0.07855650772242051, "grad_norm": 0.8436971306800842, "learning_rate": 8.602086560245537e-06, "loss": 1.5682, "step": 1194 }, { "epoch": 0.07862230044245604, "grad_norm": 0.8712666630744934, "learning_rate": 8.542134741619711e-06, "loss": 1.5686, "step": 1195 }, { "epoch": 0.07868809316249158, "grad_norm": 0.8938828110694885, "learning_rate": 8.48237305108479e-06, "loss": 1.5938, "step": 1196 }, { "epoch": 0.0787538858825271, "grad_norm": 0.8834605813026428, "learning_rate": 8.422801762711247e-06, "loss": 1.4887, "step": 1197 }, { "epoch": 0.07881967860256263, "grad_norm": 1.0207496881484985, "learning_rate": 8.363421149696332e-06, "loss": 1.576, "step": 1198 }, { "epoch": 0.07888547132259816, "grad_norm": 1.0710493326187134, "learning_rate": 8.304231484362868e-06, "loss": 1.6295, "step": 1199 }, { "epoch": 0.07895126404263368, "grad_norm": 1.221801996231079, "learning_rate": 8.245233038157962e-06, "loss": 1.6023, "step": 1200 }, { "epoch": 0.07901705676266921, "grad_norm": 0.517112672328949, "learning_rate": 8.186426081651804e-06, "loss": 1.6261, "step": 1201 }, { "epoch": 0.07908284948270473, "grad_norm": 0.5203869938850403, "learning_rate": 8.127810884536403e-06, "loss": 1.6803, "step": 1202 }, { "epoch": 0.07914864220274026, "grad_norm": 0.526978611946106, "learning_rate": 8.069387715624294e-06, "loss": 1.6658, "step": 1203 }, { "epoch": 0.0792144349227758, "grad_norm": 0.5750939249992371, "learning_rate": 8.011156842847412e-06, "loss": 1.6741, "step": 1204 }, { "epoch": 0.07928022764281133, "grad_norm": 0.5610478520393372, "learning_rate": 7.95311853325582e-06, "loss": 1.6639, "step": 1205 }, { "epoch": 0.07934602036284685, "grad_norm": 0.5532097816467285, "learning_rate": 7.89527305301645e-06, "loss": 1.6807, "step": 1206 }, { "epoch": 0.07941181308288238, "grad_norm": 0.5609330534934998, "learning_rate": 7.83762066741191e-06, "loss": 1.701, "step": 1207 }, { "epoch": 0.0794776058029179, "grad_norm": 0.5949382781982422, "learning_rate": 7.780161640839257e-06, "loss": 1.7165, "step": 1208 }, { "epoch": 0.07954339852295343, "grad_norm": 0.5886333584785461, "learning_rate": 7.722896236808807e-06, "loss": 1.5563, "step": 1209 }, { "epoch": 0.07960919124298896, "grad_norm": 0.6132276654243469, "learning_rate": 7.665824717942915e-06, "loss": 1.7714, "step": 1210 }, { "epoch": 0.0796749839630245, "grad_norm": 0.6169953942298889, "learning_rate": 7.60894734597476e-06, "loss": 1.6546, "step": 1211 }, { "epoch": 0.07974077668306002, "grad_norm": 0.6119076609611511, "learning_rate": 7.552264381747148e-06, "loss": 1.6585, "step": 1212 }, { "epoch": 0.07980656940309555, "grad_norm": 0.6173310279846191, "learning_rate": 7.495776085211331e-06, "loss": 1.4996, "step": 1213 }, { "epoch": 0.07987236212313108, "grad_norm": 0.590090811252594, "learning_rate": 7.439482715425805e-06, "loss": 1.5205, "step": 1214 }, { "epoch": 0.0799381548431666, "grad_norm": 0.6101630926132202, "learning_rate": 7.383384530555104e-06, "loss": 1.681, "step": 1215 }, { "epoch": 0.08000394756320213, "grad_norm": 0.6025815010070801, "learning_rate": 7.327481787868646e-06, "loss": 1.5154, "step": 1216 }, { "epoch": 0.08006974028323766, "grad_norm": 0.6469805240631104, "learning_rate": 7.271774743739545e-06, "loss": 1.7022, "step": 1217 }, { "epoch": 0.08013553300327318, "grad_norm": 0.8975655436515808, "learning_rate": 7.216263653643435e-06, "loss": 1.8082, "step": 1218 }, { "epoch": 0.08020132572330872, "grad_norm": 0.625879168510437, "learning_rate": 7.16094877215725e-06, "loss": 1.472, "step": 1219 }, { "epoch": 0.08026711844334425, "grad_norm": 0.6286988258361816, "learning_rate": 7.105830352958142e-06, "loss": 1.5377, "step": 1220 }, { "epoch": 0.08033291116337977, "grad_norm": 0.6680283546447754, "learning_rate": 7.050908648822291e-06, "loss": 1.6344, "step": 1221 }, { "epoch": 0.0803987038834153, "grad_norm": 0.650658130645752, "learning_rate": 6.996183911623688e-06, "loss": 1.6916, "step": 1222 }, { "epoch": 0.08046449660345083, "grad_norm": 0.6374256014823914, "learning_rate": 6.941656392333046e-06, "loss": 1.5798, "step": 1223 }, { "epoch": 0.08053028932348635, "grad_norm": 0.6473804116249084, "learning_rate": 6.887326341016636e-06, "loss": 1.433, "step": 1224 }, { "epoch": 0.08059608204352188, "grad_norm": 0.6380757689476013, "learning_rate": 6.833194006835081e-06, "loss": 1.5023, "step": 1225 }, { "epoch": 0.0806618747635574, "grad_norm": 0.6916465759277344, "learning_rate": 6.779259638042318e-06, "loss": 1.6726, "step": 1226 }, { "epoch": 0.08072766748359295, "grad_norm": 0.6720482707023621, "learning_rate": 6.725523481984375e-06, "loss": 1.7212, "step": 1227 }, { "epoch": 0.08079346020362847, "grad_norm": 0.6906450986862183, "learning_rate": 6.671985785098278e-06, "loss": 1.4751, "step": 1228 }, { "epoch": 0.080859252923664, "grad_norm": 0.685138463973999, "learning_rate": 6.618646792910893e-06, "loss": 1.5658, "step": 1229 }, { "epoch": 0.08092504564369953, "grad_norm": 0.7090581059455872, "learning_rate": 6.565506750037836e-06, "loss": 1.5718, "step": 1230 }, { "epoch": 0.08099083836373505, "grad_norm": 0.6716530919075012, "learning_rate": 6.512565900182305e-06, "loss": 1.4331, "step": 1231 }, { "epoch": 0.08105663108377058, "grad_norm": 0.6960655450820923, "learning_rate": 6.459824486134014e-06, "loss": 1.6364, "step": 1232 }, { "epoch": 0.0811224238038061, "grad_norm": 0.7236138582229614, "learning_rate": 6.407282749768029e-06, "loss": 1.5542, "step": 1233 }, { "epoch": 0.08118821652384163, "grad_norm": 0.6596976518630981, "learning_rate": 6.354940932043713e-06, "loss": 1.5349, "step": 1234 }, { "epoch": 0.08125400924387717, "grad_norm": 0.6876804232597351, "learning_rate": 6.302799273003546e-06, "loss": 1.442, "step": 1235 }, { "epoch": 0.0813198019639127, "grad_norm": 0.710265576839447, "learning_rate": 6.2508580117720985e-06, "loss": 1.515, "step": 1236 }, { "epoch": 0.08138559468394822, "grad_norm": 0.7719532251358032, "learning_rate": 6.199117386554926e-06, "loss": 1.7442, "step": 1237 }, { "epoch": 0.08145138740398375, "grad_norm": 0.7328771948814392, "learning_rate": 6.147577634637414e-06, "loss": 1.569, "step": 1238 }, { "epoch": 0.08151718012401928, "grad_norm": 0.7373148202896118, "learning_rate": 6.096238992383752e-06, "loss": 1.5404, "step": 1239 }, { "epoch": 0.0815829728440548, "grad_norm": 0.8095849752426147, "learning_rate": 6.045101695235844e-06, "loss": 1.5851, "step": 1240 }, { "epoch": 0.08164876556409033, "grad_norm": 0.7829951047897339, "learning_rate": 5.994165977712174e-06, "loss": 1.5866, "step": 1241 }, { "epoch": 0.08171455828412587, "grad_norm": 0.7352398633956909, "learning_rate": 5.943432073406796e-06, "loss": 1.5109, "step": 1242 }, { "epoch": 0.0817803510041614, "grad_norm": 0.7571715116500854, "learning_rate": 5.892900214988245e-06, "loss": 1.5424, "step": 1243 }, { "epoch": 0.08184614372419692, "grad_norm": 0.806236207485199, "learning_rate": 5.842570634198452e-06, "loss": 1.6222, "step": 1244 }, { "epoch": 0.08191193644423245, "grad_norm": 0.894612729549408, "learning_rate": 5.792443561851685e-06, "loss": 1.4945, "step": 1245 }, { "epoch": 0.08197772916426797, "grad_norm": 0.8263707756996155, "learning_rate": 5.742519227833509e-06, "loss": 1.5567, "step": 1246 }, { "epoch": 0.0820435218843035, "grad_norm": 0.8109045624732971, "learning_rate": 5.692797861099719e-06, "loss": 1.4979, "step": 1247 }, { "epoch": 0.08210931460433903, "grad_norm": 1.0256260633468628, "learning_rate": 5.643279689675279e-06, "loss": 1.528, "step": 1248 }, { "epoch": 0.08217510732437455, "grad_norm": 0.9840426445007324, "learning_rate": 5.593964940653296e-06, "loss": 1.6114, "step": 1249 }, { "epoch": 0.08224090004441009, "grad_norm": 1.197351098060608, "learning_rate": 5.544853840193981e-06, "loss": 1.2792, "step": 1250 }, { "epoch": 0.08230669276444562, "grad_norm": 0.5352635383605957, "learning_rate": 5.495946613523567e-06, "loss": 1.7328, "step": 1251 }, { "epoch": 0.08237248548448114, "grad_norm": 0.5567160248756409, "learning_rate": 5.4472434849333396e-06, "loss": 1.6491, "step": 1252 }, { "epoch": 0.08243827820451667, "grad_norm": 0.6188229918479919, "learning_rate": 5.398744677778594e-06, "loss": 1.6258, "step": 1253 }, { "epoch": 0.0825040709245522, "grad_norm": 0.565366804599762, "learning_rate": 5.3504504144775535e-06, "loss": 1.7164, "step": 1254 }, { "epoch": 0.08256986364458772, "grad_norm": 0.5582414269447327, "learning_rate": 5.302360916510424e-06, "loss": 1.6116, "step": 1255 }, { "epoch": 0.08263565636462325, "grad_norm": 0.5899804830551147, "learning_rate": 5.25447640441834e-06, "loss": 1.6297, "step": 1256 }, { "epoch": 0.08270144908465878, "grad_norm": 0.6136707067489624, "learning_rate": 5.206797097802341e-06, "loss": 1.8456, "step": 1257 }, { "epoch": 0.08276724180469432, "grad_norm": 0.5709074139595032, "learning_rate": 5.1593232153223984e-06, "loss": 1.5469, "step": 1258 }, { "epoch": 0.08283303452472984, "grad_norm": 0.5794378519058228, "learning_rate": 5.112054974696395e-06, "loss": 1.594, "step": 1259 }, { "epoch": 0.08289882724476537, "grad_norm": 0.6078215837478638, "learning_rate": 5.064992592699136e-06, "loss": 1.7557, "step": 1260 }, { "epoch": 0.0829646199648009, "grad_norm": 0.6244221925735474, "learning_rate": 5.018136285161329e-06, "loss": 1.6409, "step": 1261 }, { "epoch": 0.08303041268483642, "grad_norm": 0.6333777904510498, "learning_rate": 4.9714862669686335e-06, "loss": 1.6453, "step": 1262 }, { "epoch": 0.08309620540487195, "grad_norm": 0.6196883320808411, "learning_rate": 4.925042752060638e-06, "loss": 1.539, "step": 1263 }, { "epoch": 0.08316199812490747, "grad_norm": 0.6235030293464661, "learning_rate": 4.87880595342991e-06, "loss": 1.6992, "step": 1264 }, { "epoch": 0.083227790844943, "grad_norm": 0.6125441789627075, "learning_rate": 4.832776083120982e-06, "loss": 1.5375, "step": 1265 }, { "epoch": 0.08329358356497854, "grad_norm": 0.6090467572212219, "learning_rate": 4.7869533522294395e-06, "loss": 1.5203, "step": 1266 }, { "epoch": 0.08335937628501407, "grad_norm": 0.6368080377578735, "learning_rate": 4.741337970900866e-06, "loss": 1.6075, "step": 1267 }, { "epoch": 0.0834251690050496, "grad_norm": 0.6555090546607971, "learning_rate": 4.695930148329958e-06, "loss": 1.666, "step": 1268 }, { "epoch": 0.08349096172508512, "grad_norm": 0.6124194264411926, "learning_rate": 4.650730092759542e-06, "loss": 1.5002, "step": 1269 }, { "epoch": 0.08355675444512065, "grad_norm": 0.6861993670463562, "learning_rate": 4.605738011479604e-06, "loss": 1.8197, "step": 1270 }, { "epoch": 0.08362254716515617, "grad_norm": 0.6512139439582825, "learning_rate": 4.560954110826337e-06, "loss": 1.5302, "step": 1271 }, { "epoch": 0.0836883398851917, "grad_norm": 0.6771849393844604, "learning_rate": 4.516378596181237e-06, "loss": 1.5859, "step": 1272 }, { "epoch": 0.08375413260522724, "grad_norm": 0.6660013198852539, "learning_rate": 4.472011671970083e-06, "loss": 1.6946, "step": 1273 }, { "epoch": 0.08381992532526276, "grad_norm": 0.6350880861282349, "learning_rate": 4.427853541662091e-06, "loss": 1.5967, "step": 1274 }, { "epoch": 0.08388571804529829, "grad_norm": 0.6567019820213318, "learning_rate": 4.383904407768907e-06, "loss": 1.5232, "step": 1275 }, { "epoch": 0.08395151076533382, "grad_norm": 0.6577147245407104, "learning_rate": 4.340164471843722e-06, "loss": 1.5343, "step": 1276 }, { "epoch": 0.08401730348536934, "grad_norm": 0.7278039455413818, "learning_rate": 4.296633934480337e-06, "loss": 1.6968, "step": 1277 }, { "epoch": 0.08408309620540487, "grad_norm": 0.7200085520744324, "learning_rate": 4.253312995312231e-06, "loss": 1.5763, "step": 1278 }, { "epoch": 0.0841488889254404, "grad_norm": 0.6737844944000244, "learning_rate": 4.210201853011652e-06, "loss": 1.5614, "step": 1279 }, { "epoch": 0.08421468164547592, "grad_norm": 0.7159338593482971, "learning_rate": 4.167300705288718e-06, "loss": 1.7838, "step": 1280 }, { "epoch": 0.08428047436551146, "grad_norm": 0.7275623679161072, "learning_rate": 4.12460974889049e-06, "loss": 1.7268, "step": 1281 }, { "epoch": 0.08434626708554699, "grad_norm": 0.6852192878723145, "learning_rate": 4.082129179600097e-06, "loss": 1.4123, "step": 1282 }, { "epoch": 0.08441205980558251, "grad_norm": 0.7260058522224426, "learning_rate": 4.039859192235779e-06, "loss": 1.5387, "step": 1283 }, { "epoch": 0.08447785252561804, "grad_norm": 0.7276716828346252, "learning_rate": 3.99779998065008e-06, "loss": 1.6186, "step": 1284 }, { "epoch": 0.08454364524565357, "grad_norm": 0.8000338077545166, "learning_rate": 3.955951737728902e-06, "loss": 1.544, "step": 1285 }, { "epoch": 0.0846094379656891, "grad_norm": 0.7734615206718445, "learning_rate": 3.914314655390633e-06, "loss": 1.628, "step": 1286 }, { "epoch": 0.08467523068572462, "grad_norm": 0.742764949798584, "learning_rate": 3.872888924585255e-06, "loss": 1.5357, "step": 1287 }, { "epoch": 0.08474102340576015, "grad_norm": 0.7545043230056763, "learning_rate": 3.831674735293506e-06, "loss": 1.6103, "step": 1288 }, { "epoch": 0.08480681612579569, "grad_norm": 0.7599538564682007, "learning_rate": 3.790672276525936e-06, "loss": 1.5277, "step": 1289 }, { "epoch": 0.08487260884583121, "grad_norm": 0.7628370523452759, "learning_rate": 3.749881736322136e-06, "loss": 1.5536, "step": 1290 }, { "epoch": 0.08493840156586674, "grad_norm": 0.7717123031616211, "learning_rate": 3.7093033017497946e-06, "loss": 1.6771, "step": 1291 }, { "epoch": 0.08500419428590227, "grad_norm": 0.7848740220069885, "learning_rate": 3.668937158903901e-06, "loss": 1.4859, "step": 1292 }, { "epoch": 0.08506998700593779, "grad_norm": 0.8771865367889404, "learning_rate": 3.6287834929058295e-06, "loss": 1.5999, "step": 1293 }, { "epoch": 0.08513577972597332, "grad_norm": 0.8170807361602783, "learning_rate": 3.5888424879025495e-06, "loss": 1.4485, "step": 1294 }, { "epoch": 0.08520157244600884, "grad_norm": 0.8586212396621704, "learning_rate": 3.5491143270657446e-06, "loss": 1.6085, "step": 1295 }, { "epoch": 0.08526736516604437, "grad_norm": 0.8808199763298035, "learning_rate": 3.5095991925909845e-06, "loss": 1.4775, "step": 1296 }, { "epoch": 0.08533315788607991, "grad_norm": 0.8514795303344727, "learning_rate": 3.470297265696887e-06, "loss": 1.4806, "step": 1297 }, { "epoch": 0.08539895060611544, "grad_norm": 0.9353112578392029, "learning_rate": 3.4312087266242963e-06, "loss": 1.4445, "step": 1298 }, { "epoch": 0.08546474332615096, "grad_norm": 0.9689911007881165, "learning_rate": 3.3923337546354297e-06, "loss": 1.2047, "step": 1299 }, { "epoch": 0.08553053604618649, "grad_norm": 1.2559093236923218, "learning_rate": 3.3536725280130744e-06, "loss": 1.2073, "step": 1300 }, { "epoch": 0.08559632876622202, "grad_norm": 0.5083854794502258, "learning_rate": 3.315225224059809e-06, "loss": 1.5963, "step": 1301 }, { "epoch": 0.08566212148625754, "grad_norm": 0.5269922018051147, "learning_rate": 3.2769920190971027e-06, "loss": 1.5835, "step": 1302 }, { "epoch": 0.08572791420629307, "grad_norm": 0.5758770704269409, "learning_rate": 3.2389730884645807e-06, "loss": 1.6247, "step": 1303 }, { "epoch": 0.08579370692632861, "grad_norm": 0.5545147061347961, "learning_rate": 3.2011686065191895e-06, "loss": 1.6925, "step": 1304 }, { "epoch": 0.08585949964636413, "grad_norm": 0.5974170565605164, "learning_rate": 3.163578746634388e-06, "loss": 1.7166, "step": 1305 }, { "epoch": 0.08592529236639966, "grad_norm": 0.5378652215003967, "learning_rate": 3.1262036811993856e-06, "loss": 1.5484, "step": 1306 }, { "epoch": 0.08599108508643519, "grad_norm": 0.6244592666625977, "learning_rate": 3.0890435816183226e-06, "loss": 1.8135, "step": 1307 }, { "epoch": 0.08605687780647071, "grad_norm": 0.5631170272827148, "learning_rate": 3.0520986183095014e-06, "loss": 1.6121, "step": 1308 }, { "epoch": 0.08612267052650624, "grad_norm": 0.5755576491355896, "learning_rate": 3.0153689607045845e-06, "loss": 1.5121, "step": 1309 }, { "epoch": 0.08618846324654177, "grad_norm": 0.6194965839385986, "learning_rate": 2.9788547772478416e-06, "loss": 1.5921, "step": 1310 }, { "epoch": 0.08625425596657729, "grad_norm": 0.6144405603408813, "learning_rate": 2.9425562353953604e-06, "loss": 1.683, "step": 1311 }, { "epoch": 0.08632004868661283, "grad_norm": 0.6393880844116211, "learning_rate": 2.9064735016142873e-06, "loss": 1.6299, "step": 1312 }, { "epoch": 0.08638584140664836, "grad_norm": 0.6104136109352112, "learning_rate": 2.870606741382059e-06, "loss": 1.6589, "step": 1313 }, { "epoch": 0.08645163412668389, "grad_norm": 0.6379032135009766, "learning_rate": 2.83495611918565e-06, "loss": 1.5917, "step": 1314 }, { "epoch": 0.08651742684671941, "grad_norm": 0.6215010285377502, "learning_rate": 2.7995217985208098e-06, "loss": 1.5812, "step": 1315 }, { "epoch": 0.08658321956675494, "grad_norm": 0.6086938977241516, "learning_rate": 2.7643039418913e-06, "loss": 1.5549, "step": 1316 }, { "epoch": 0.08664901228679046, "grad_norm": 0.6653648018836975, "learning_rate": 2.729302710808196e-06, "loss": 1.4754, "step": 1317 }, { "epoch": 0.08671480500682599, "grad_norm": 0.6537653803825378, "learning_rate": 2.6945182657891034e-06, "loss": 1.7067, "step": 1318 }, { "epoch": 0.08678059772686152, "grad_norm": 0.6637395620346069, "learning_rate": 2.6599507663574384e-06, "loss": 1.5416, "step": 1319 }, { "epoch": 0.08684639044689706, "grad_norm": 0.6458157300949097, "learning_rate": 2.6256003710416864e-06, "loss": 1.5356, "step": 1320 }, { "epoch": 0.08691218316693258, "grad_norm": 0.6820713877677917, "learning_rate": 2.5914672373746674e-06, "loss": 1.797, "step": 1321 }, { "epoch": 0.08697797588696811, "grad_norm": 0.6089844107627869, "learning_rate": 2.5575515218928592e-06, "loss": 1.5584, "step": 1322 }, { "epoch": 0.08704376860700364, "grad_norm": 0.7012182474136353, "learning_rate": 2.5238533801356324e-06, "loss": 1.6888, "step": 1323 }, { "epoch": 0.08710956132703916, "grad_norm": 0.6670122742652893, "learning_rate": 2.490372966644544e-06, "loss": 1.6455, "step": 1324 }, { "epoch": 0.08717535404707469, "grad_norm": 0.6817238330841064, "learning_rate": 2.457110434962645e-06, "loss": 1.575, "step": 1325 }, { "epoch": 0.08724114676711021, "grad_norm": 0.6881915926933289, "learning_rate": 2.424065937633768e-06, "loss": 1.7438, "step": 1326 }, { "epoch": 0.08730693948714574, "grad_norm": 0.6820628643035889, "learning_rate": 2.3912396262018357e-06, "loss": 1.5352, "step": 1327 }, { "epoch": 0.08737273220718128, "grad_norm": 0.7019688487052917, "learning_rate": 2.3586316512101416e-06, "loss": 1.6225, "step": 1328 }, { "epoch": 0.0874385249272168, "grad_norm": 0.6856455206871033, "learning_rate": 2.3262421622006868e-06, "loss": 1.7121, "step": 1329 }, { "epoch": 0.08750431764725233, "grad_norm": 0.7237346768379211, "learning_rate": 2.29407130771348e-06, "loss": 1.6317, "step": 1330 }, { "epoch": 0.08757011036728786, "grad_norm": 0.728233814239502, "learning_rate": 2.26211923528587e-06, "loss": 1.5668, "step": 1331 }, { "epoch": 0.08763590308732339, "grad_norm": 0.6574517488479614, "learning_rate": 2.2303860914518306e-06, "loss": 1.496, "step": 1332 }, { "epoch": 0.08770169580735891, "grad_norm": 0.7108151912689209, "learning_rate": 2.1988720217413494e-06, "loss": 1.4753, "step": 1333 }, { "epoch": 0.08776748852739444, "grad_norm": 0.7035221457481384, "learning_rate": 2.1675771706797132e-06, "loss": 1.4783, "step": 1334 }, { "epoch": 0.08783328124742998, "grad_norm": 0.713604211807251, "learning_rate": 2.136501681786862e-06, "loss": 1.6723, "step": 1335 }, { "epoch": 0.0878990739674655, "grad_norm": 0.7584853768348694, "learning_rate": 2.10564569757673e-06, "loss": 1.457, "step": 1336 }, { "epoch": 0.08796486668750103, "grad_norm": 0.7580429911613464, "learning_rate": 2.0750093595565733e-06, "loss": 1.597, "step": 1337 }, { "epoch": 0.08803065940753656, "grad_norm": 0.7131192684173584, "learning_rate": 2.0445928082263645e-06, "loss": 1.5909, "step": 1338 }, { "epoch": 0.08809645212757208, "grad_norm": 0.7904130816459656, "learning_rate": 2.01439618307811e-06, "loss": 1.5464, "step": 1339 }, { "epoch": 0.08816224484760761, "grad_norm": 0.8404092192649841, "learning_rate": 1.984419622595224e-06, "loss": 1.616, "step": 1340 }, { "epoch": 0.08822803756764314, "grad_norm": 0.7262335419654846, "learning_rate": 1.954663264251888e-06, "loss": 1.5096, "step": 1341 }, { "epoch": 0.08829383028767866, "grad_norm": 0.8143447041511536, "learning_rate": 1.925127244512426e-06, "loss": 1.4777, "step": 1342 }, { "epoch": 0.0883596230077142, "grad_norm": 0.8100689053535461, "learning_rate": 1.895811698830685e-06, "loss": 1.4577, "step": 1343 }, { "epoch": 0.08842541572774973, "grad_norm": 0.8460294008255005, "learning_rate": 1.8667167616493896e-06, "loss": 1.5934, "step": 1344 }, { "epoch": 0.08849120844778526, "grad_norm": 0.8502865433692932, "learning_rate": 1.8378425663995559e-06, "loss": 1.435, "step": 1345 }, { "epoch": 0.08855700116782078, "grad_norm": 0.9002560377120972, "learning_rate": 1.8091892454998594e-06, "loss": 1.459, "step": 1346 }, { "epoch": 0.08862279388785631, "grad_norm": 0.8573265075683594, "learning_rate": 1.7807569303560367e-06, "loss": 1.5029, "step": 1347 }, { "epoch": 0.08868858660789183, "grad_norm": 0.8953090310096741, "learning_rate": 1.7525457513602683e-06, "loss": 1.4322, "step": 1348 }, { "epoch": 0.08875437932792736, "grad_norm": 0.9949759244918823, "learning_rate": 1.7245558378906013e-06, "loss": 1.4967, "step": 1349 }, { "epoch": 0.08882017204796289, "grad_norm": 1.1277623176574707, "learning_rate": 1.6967873183103556e-06, "loss": 1.0865, "step": 1350 }, { "epoch": 0.08888596476799843, "grad_norm": 0.5030365586280823, "learning_rate": 1.6692403199675078e-06, "loss": 1.6924, "step": 1351 }, { "epoch": 0.08895175748803395, "grad_norm": 0.5737792253494263, "learning_rate": 1.641914969194147e-06, "loss": 1.7576, "step": 1352 }, { "epoch": 0.08901755020806948, "grad_norm": 0.5396728515625, "learning_rate": 1.6148113913058427e-06, "loss": 1.4578, "step": 1353 }, { "epoch": 0.089083342928105, "grad_norm": 0.535679280757904, "learning_rate": 1.587929710601127e-06, "loss": 1.5822, "step": 1354 }, { "epoch": 0.08914913564814053, "grad_norm": 0.5770007371902466, "learning_rate": 1.5612700503608968e-06, "loss": 1.7699, "step": 1355 }, { "epoch": 0.08921492836817606, "grad_norm": 0.5972631573677063, "learning_rate": 1.5348325328478408e-06, "loss": 1.6394, "step": 1356 }, { "epoch": 0.08928072108821158, "grad_norm": 0.5818490386009216, "learning_rate": 1.5086172793059017e-06, "loss": 1.7062, "step": 1357 }, { "epoch": 0.08934651380824711, "grad_norm": 0.6017389893531799, "learning_rate": 1.4826244099596986e-06, "loss": 1.6664, "step": 1358 }, { "epoch": 0.08941230652828265, "grad_norm": 0.6041622161865234, "learning_rate": 1.4568540440139777e-06, "loss": 1.7427, "step": 1359 }, { "epoch": 0.08947809924831818, "grad_norm": 0.8328604698181152, "learning_rate": 1.4313062996530847e-06, "loss": 1.5239, "step": 1360 }, { "epoch": 0.0895438919683537, "grad_norm": 0.6048279404640198, "learning_rate": 1.4059812940404093e-06, "loss": 1.7337, "step": 1361 }, { "epoch": 0.08960968468838923, "grad_norm": 0.6227853894233704, "learning_rate": 1.3808791433178369e-06, "loss": 1.451, "step": 1362 }, { "epoch": 0.08967547740842476, "grad_norm": 0.5862622857093811, "learning_rate": 1.3559999626052477e-06, "loss": 1.4699, "step": 1363 }, { "epoch": 0.08974127012846028, "grad_norm": 0.6687149405479431, "learning_rate": 1.33134386599994e-06, "loss": 1.5708, "step": 1364 }, { "epoch": 0.08980706284849581, "grad_norm": 0.65779709815979, "learning_rate": 1.3069109665761693e-06, "loss": 1.6006, "step": 1365 }, { "epoch": 0.08987285556853135, "grad_norm": 0.6536720991134644, "learning_rate": 1.2827013763845707e-06, "loss": 1.6428, "step": 1366 }, { "epoch": 0.08993864828856687, "grad_norm": 0.6218163967132568, "learning_rate": 1.2587152064516827e-06, "loss": 1.5861, "step": 1367 }, { "epoch": 0.0900044410086024, "grad_norm": 0.6612845063209534, "learning_rate": 1.2349525667794293e-06, "loss": 1.7162, "step": 1368 }, { "epoch": 0.09007023372863793, "grad_norm": 0.6771432757377625, "learning_rate": 1.211413566344599e-06, "loss": 1.7318, "step": 1369 }, { "epoch": 0.09013602644867345, "grad_norm": 0.6161140203475952, "learning_rate": 1.1880983130983626e-06, "loss": 1.5076, "step": 1370 }, { "epoch": 0.09020181916870898, "grad_norm": 0.6894911527633667, "learning_rate": 1.1650069139657826e-06, "loss": 1.7694, "step": 1371 }, { "epoch": 0.0902676118887445, "grad_norm": 0.6760029196739197, "learning_rate": 1.1421394748453108e-06, "loss": 1.6852, "step": 1372 }, { "epoch": 0.09033340460878003, "grad_norm": 0.716984748840332, "learning_rate": 1.1194961006082972e-06, "loss": 1.4935, "step": 1373 }, { "epoch": 0.09039919732881557, "grad_norm": 0.6679751873016357, "learning_rate": 1.0970768950985199e-06, "loss": 1.7724, "step": 1374 }, { "epoch": 0.0904649900488511, "grad_norm": 0.6891523599624634, "learning_rate": 1.074881961131724e-06, "loss": 1.5886, "step": 1375 }, { "epoch": 0.09053078276888663, "grad_norm": 0.6760563254356384, "learning_rate": 1.0529114004951047e-06, "loss": 1.632, "step": 1376 }, { "epoch": 0.09059657548892215, "grad_norm": 0.6902234554290771, "learning_rate": 1.0311653139468969e-06, "loss": 1.6434, "step": 1377 }, { "epoch": 0.09066236820895768, "grad_norm": 0.6432000398635864, "learning_rate": 1.0096438012158539e-06, "loss": 1.5294, "step": 1378 }, { "epoch": 0.0907281609289932, "grad_norm": 0.6482113599777222, "learning_rate": 9.883469610008577e-07, "loss": 1.5759, "step": 1379 }, { "epoch": 0.09079395364902873, "grad_norm": 0.7423402070999146, "learning_rate": 9.672748909703934e-07, "loss": 1.697, "step": 1380 }, { "epoch": 0.09085974636906426, "grad_norm": 0.7108086943626404, "learning_rate": 9.46427687762158e-07, "loss": 1.6335, "step": 1381 }, { "epoch": 0.0909255390890998, "grad_norm": 0.6593964695930481, "learning_rate": 9.258054469825972e-07, "loss": 1.5451, "step": 1382 }, { "epoch": 0.09099133180913532, "grad_norm": 0.7182891964912415, "learning_rate": 9.054082632064642e-07, "loss": 1.4827, "step": 1383 }, { "epoch": 0.09105712452917085, "grad_norm": 0.7174695730209351, "learning_rate": 8.852362299763772e-07, "loss": 1.652, "step": 1384 }, { "epoch": 0.09112291724920638, "grad_norm": 0.7062637209892273, "learning_rate": 8.652894398024136e-07, "loss": 1.478, "step": 1385 }, { "epoch": 0.0911887099692419, "grad_norm": 0.737032949924469, "learning_rate": 8.455679841616659e-07, "loss": 1.6173, "step": 1386 }, { "epoch": 0.09125450268927743, "grad_norm": 0.8026255369186401, "learning_rate": 8.260719534978368e-07, "loss": 1.5671, "step": 1387 }, { "epoch": 0.09132029540931295, "grad_norm": 0.7752266526222229, "learning_rate": 8.06801437220811e-07, "loss": 1.7083, "step": 1388 }, { "epoch": 0.0913860881293485, "grad_norm": 0.7705795764923096, "learning_rate": 7.877565237062623e-07, "loss": 1.4577, "step": 1389 }, { "epoch": 0.09145188084938402, "grad_norm": 0.799823522567749, "learning_rate": 7.689373002952305e-07, "loss": 1.7553, "step": 1390 }, { "epoch": 0.09151767356941955, "grad_norm": 0.7618536353111267, "learning_rate": 7.503438532937168e-07, "loss": 1.4927, "step": 1391 }, { "epoch": 0.09158346628945507, "grad_norm": 0.9011740684509277, "learning_rate": 7.319762679723174e-07, "loss": 1.6078, "step": 1392 }, { "epoch": 0.0916492590094906, "grad_norm": 0.8119943737983704, "learning_rate": 7.138346285658071e-07, "loss": 1.3945, "step": 1393 }, { "epoch": 0.09171505172952613, "grad_norm": 0.8282358050346375, "learning_rate": 6.959190182727615e-07, "loss": 1.6105, "step": 1394 }, { "epoch": 0.09178084444956165, "grad_norm": 0.8864809274673462, "learning_rate": 6.782295192551691e-07, "loss": 1.6667, "step": 1395 }, { "epoch": 0.09184663716959718, "grad_norm": 0.8417677879333496, "learning_rate": 6.607662126380587e-07, "loss": 1.513, "step": 1396 }, { "epoch": 0.09191242988963272, "grad_norm": 0.8300901651382446, "learning_rate": 6.43529178509139e-07, "loss": 1.3415, "step": 1397 }, { "epoch": 0.09197822260966824, "grad_norm": 0.9474268555641174, "learning_rate": 6.265184959184101e-07, "loss": 1.56, "step": 1398 }, { "epoch": 0.09204401532970377, "grad_norm": 1.1085734367370605, "learning_rate": 6.097342428778185e-07, "loss": 1.4722, "step": 1399 }, { "epoch": 0.0921098080497393, "grad_norm": 1.2595409154891968, "learning_rate": 5.931764963608866e-07, "loss": 1.2695, "step": 1400 }, { "epoch": 0.09217560076977482, "grad_norm": 0.4739496409893036, "learning_rate": 5.768453323023615e-07, "loss": 1.5488, "step": 1401 }, { "epoch": 0.09224139348981035, "grad_norm": 0.550033688545227, "learning_rate": 5.60740825597883e-07, "loss": 1.761, "step": 1402 }, { "epoch": 0.09230718620984588, "grad_norm": 0.5993760824203491, "learning_rate": 5.448630501036112e-07, "loss": 1.8681, "step": 1403 }, { "epoch": 0.0923729789298814, "grad_norm": 0.5492662787437439, "learning_rate": 5.292120786359267e-07, "loss": 1.6244, "step": 1404 }, { "epoch": 0.09243877164991694, "grad_norm": 0.5324863195419312, "learning_rate": 5.137879829710424e-07, "loss": 1.579, "step": 1405 }, { "epoch": 0.09250456436995247, "grad_norm": 0.5814157724380493, "learning_rate": 4.985908338447476e-07, "loss": 1.6163, "step": 1406 }, { "epoch": 0.092570357089988, "grad_norm": 0.5995772480964661, "learning_rate": 4.836207009519977e-07, "loss": 1.657, "step": 1407 }, { "epoch": 0.09263614981002352, "grad_norm": 0.6003164649009705, "learning_rate": 4.688776529466754e-07, "loss": 1.6132, "step": 1408 }, { "epoch": 0.09270194253005905, "grad_norm": 0.5804505348205566, "learning_rate": 4.543617574412184e-07, "loss": 1.5816, "step": 1409 }, { "epoch": 0.09276773525009457, "grad_norm": 0.5852599143981934, "learning_rate": 4.4007308100633136e-07, "loss": 1.6799, "step": 1410 }, { "epoch": 0.0928335279701301, "grad_norm": 0.6366518139839172, "learning_rate": 4.2601168917069114e-07, "loss": 1.7649, "step": 1411 }, { "epoch": 0.09289932069016563, "grad_norm": 0.5954082608222961, "learning_rate": 4.121776464206251e-07, "loss": 1.6245, "step": 1412 }, { "epoch": 0.09296511341020117, "grad_norm": 0.6634638905525208, "learning_rate": 3.9857101619982797e-07, "loss": 1.7316, "step": 1413 }, { "epoch": 0.09303090613023669, "grad_norm": 0.6314533352851868, "learning_rate": 3.851918609090677e-07, "loss": 1.6691, "step": 1414 }, { "epoch": 0.09309669885027222, "grad_norm": 0.6321081519126892, "learning_rate": 3.720402419058966e-07, "loss": 1.4713, "step": 1415 }, { "epoch": 0.09316249157030775, "grad_norm": 0.6127361059188843, "learning_rate": 3.5911621950438514e-07, "loss": 1.5676, "step": 1416 }, { "epoch": 0.09322828429034327, "grad_norm": 0.6360514760017395, "learning_rate": 3.464198529748108e-07, "loss": 1.5381, "step": 1417 }, { "epoch": 0.0932940770103788, "grad_norm": 0.6701735258102417, "learning_rate": 3.339512005434309e-07, "loss": 1.6244, "step": 1418 }, { "epoch": 0.09335986973041432, "grad_norm": 0.6812663674354553, "learning_rate": 3.2171031939217666e-07, "loss": 1.6387, "step": 1419 }, { "epoch": 0.09342566245044986, "grad_norm": 0.6711384057998657, "learning_rate": 3.0969726565842074e-07, "loss": 1.5678, "step": 1420 }, { "epoch": 0.09349145517048539, "grad_norm": 0.7010900974273682, "learning_rate": 2.979120944346936e-07, "loss": 1.6543, "step": 1421 }, { "epoch": 0.09355724789052092, "grad_norm": 0.6457873582839966, "learning_rate": 2.863548597684562e-07, "loss": 1.6793, "step": 1422 }, { "epoch": 0.09362304061055644, "grad_norm": 0.6923664808273315, "learning_rate": 2.750256146618335e-07, "loss": 1.5873, "step": 1423 }, { "epoch": 0.09368883333059197, "grad_norm": 0.6824166178703308, "learning_rate": 2.639244110713701e-07, "loss": 1.5315, "step": 1424 }, { "epoch": 0.0937546260506275, "grad_norm": 0.6563824415206909, "learning_rate": 2.5305129990781387e-07, "loss": 1.604, "step": 1425 }, { "epoch": 0.09382041877066302, "grad_norm": 0.6801635026931763, "learning_rate": 2.424063310358604e-07, "loss": 1.5743, "step": 1426 }, { "epoch": 0.09388621149069855, "grad_norm": 0.7070457935333252, "learning_rate": 2.319895532739369e-07, "loss": 1.5733, "step": 1427 }, { "epoch": 0.09395200421073409, "grad_norm": 0.661389172077179, "learning_rate": 2.218010143939575e-07, "loss": 1.4381, "step": 1428 }, { "epoch": 0.09401779693076961, "grad_norm": 0.7122326493263245, "learning_rate": 2.1184076112114038e-07, "loss": 1.5635, "step": 1429 }, { "epoch": 0.09408358965080514, "grad_norm": 0.6946954131126404, "learning_rate": 2.0210883913376334e-07, "loss": 1.558, "step": 1430 }, { "epoch": 0.09414938237084067, "grad_norm": 0.7263256311416626, "learning_rate": 1.9260529306296404e-07, "loss": 1.6198, "step": 1431 }, { "epoch": 0.0942151750908762, "grad_norm": 0.7192728519439697, "learning_rate": 1.833301664925402e-07, "loss": 1.6123, "step": 1432 }, { "epoch": 0.09428096781091172, "grad_norm": 0.6984828114509583, "learning_rate": 1.742835019587441e-07, "loss": 1.5176, "step": 1433 }, { "epoch": 0.09434676053094725, "grad_norm": 0.7076666951179504, "learning_rate": 1.6546534095007172e-07, "loss": 1.5039, "step": 1434 }, { "epoch": 0.09441255325098277, "grad_norm": 0.7117300629615784, "learning_rate": 1.5687572390711835e-07, "loss": 1.5169, "step": 1435 }, { "epoch": 0.09447834597101831, "grad_norm": 0.7957559823989868, "learning_rate": 1.4851469022234e-07, "loss": 1.6177, "step": 1436 }, { "epoch": 0.09454413869105384, "grad_norm": 0.7519058585166931, "learning_rate": 1.403822782399089e-07, "loss": 1.5329, "step": 1437 }, { "epoch": 0.09460993141108937, "grad_norm": 0.7622172236442566, "learning_rate": 1.324785252555194e-07, "loss": 1.3998, "step": 1438 }, { "epoch": 0.09467572413112489, "grad_norm": 0.767694354057312, "learning_rate": 1.2480346751622686e-07, "loss": 1.5882, "step": 1439 }, { "epoch": 0.09474151685116042, "grad_norm": 0.7719693183898926, "learning_rate": 1.1735714022027555e-07, "loss": 1.3252, "step": 1440 }, { "epoch": 0.09480730957119594, "grad_norm": 0.7426503896713257, "learning_rate": 1.1013957751693782e-07, "loss": 1.5377, "step": 1441 }, { "epoch": 0.09487310229123147, "grad_norm": 0.7355154156684875, "learning_rate": 1.0315081250636405e-07, "loss": 1.424, "step": 1442 }, { "epoch": 0.094938895011267, "grad_norm": 0.8036089539527893, "learning_rate": 9.63908772394162e-08, "loss": 1.605, "step": 1443 }, { "epoch": 0.09500468773130254, "grad_norm": 0.878667950630188, "learning_rate": 8.985980271754013e-08, "loss": 1.5414, "step": 1444 }, { "epoch": 0.09507048045133806, "grad_norm": 0.9153657555580139, "learning_rate": 8.355761889260461e-08, "loss": 1.5492, "step": 1445 }, { "epoch": 0.09513627317137359, "grad_norm": 0.8688125014305115, "learning_rate": 7.748435466678471e-08, "loss": 1.525, "step": 1446 }, { "epoch": 0.09520206589140912, "grad_norm": 0.9009868502616882, "learning_rate": 7.164003789240648e-08, "loss": 1.4909, "step": 1447 }, { "epoch": 0.09526785861144464, "grad_norm": 0.947225034236908, "learning_rate": 6.602469537183021e-08, "loss": 1.4103, "step": 1448 }, { "epoch": 0.09533365133148017, "grad_norm": 1.0838871002197266, "learning_rate": 6.063835285733955e-08, "loss": 1.5726, "step": 1449 }, { "epoch": 0.0953994440515157, "grad_norm": 1.4739456176757812, "learning_rate": 5.5481035050991556e-08, "loss": 1.0046, "step": 1450 }, { "epoch": 0.09546523677155123, "grad_norm": 0.5145021677017212, "learning_rate": 5.0552765604544584e-08, "loss": 1.7385, "step": 1451 }, { "epoch": 0.09553102949158676, "grad_norm": 0.5236654877662659, "learning_rate": 4.585356711931388e-08, "loss": 1.7165, "step": 1452 }, { "epoch": 0.09559682221162229, "grad_norm": 0.5186619758605957, "learning_rate": 4.138346114608283e-08, "loss": 1.4829, "step": 1453 }, { "epoch": 0.09566261493165781, "grad_norm": 0.5814453959465027, "learning_rate": 3.7142468185014104e-08, "loss": 1.6926, "step": 1454 }, { "epoch": 0.09572840765169334, "grad_norm": 0.5442173480987549, "learning_rate": 3.313060768553866e-08, "loss": 1.6347, "step": 1455 }, { "epoch": 0.09579420037172887, "grad_norm": 0.5665310025215149, "learning_rate": 2.9347898046266918e-08, "loss": 1.5986, "step": 1456 }, { "epoch": 0.09585999309176439, "grad_norm": 0.5988601446151733, "learning_rate": 2.5794356614922134e-08, "loss": 1.8659, "step": 1457 }, { "epoch": 0.09592578581179992, "grad_norm": 0.5948224067687988, "learning_rate": 2.2469999688246035e-08, "loss": 1.7225, "step": 1458 }, { "epoch": 0.09599157853183546, "grad_norm": 0.6055050492286682, "learning_rate": 1.937484251192112e-08, "loss": 1.6482, "step": 1459 }, { "epoch": 0.09605737125187098, "grad_norm": 0.6156303286552429, "learning_rate": 1.6508899280515134e-08, "loss": 1.76, "step": 1460 }, { "epoch": 0.09612316397190651, "grad_norm": 0.5953572988510132, "learning_rate": 1.3872183137397799e-08, "loss": 1.6664, "step": 1461 }, { "epoch": 0.09618895669194204, "grad_norm": 0.6339824199676514, "learning_rate": 1.1464706174701967e-08, "loss": 1.5749, "step": 1462 }, { "epoch": 0.09625474941197756, "grad_norm": 0.598288357257843, "learning_rate": 9.286479433257e-09, "loss": 1.5851, "step": 1463 }, { "epoch": 0.09632054213201309, "grad_norm": 0.6078794002532959, "learning_rate": 7.337512902522159e-09, "loss": 1.6656, "step": 1464 }, { "epoch": 0.09638633485204862, "grad_norm": 0.6280359029769897, "learning_rate": 5.6178155205754975e-09, "loss": 1.6216, "step": 1465 }, { "epoch": 0.09645212757208414, "grad_norm": 0.5920078754425049, "learning_rate": 4.127395174036153e-09, "loss": 1.5143, "step": 1466 }, { "epoch": 0.09651792029211968, "grad_norm": 0.6147146224975586, "learning_rate": 2.866258698064339e-09, "loss": 1.559, "step": 1467 }, { "epoch": 0.09658371301215521, "grad_norm": 0.6601701974868774, "learning_rate": 1.8344118763002903e-09, "loss": 1.593, "step": 1468 }, { "epoch": 0.09664950573219074, "grad_norm": 0.6434551477432251, "learning_rate": 1.0318594408476045e-09, "loss": 1.5645, "step": 1469 }, { "epoch": 0.09671529845222626, "grad_norm": 0.6171491146087646, "learning_rate": 4.586050722621416e-10, "loss": 1.589, "step": 1470 }, { "epoch": 0.09678109117226179, "grad_norm": 0.6035428047180176, "learning_rate": 1.1465139951316595e-10, "loss": 1.5367, "step": 1471 }, { "epoch": 0.09684688389229731, "grad_norm": 0.6608728170394897, "learning_rate": 0.0, "loss": 1.517, "step": 1472 } ], "logging_steps": 1, "max_steps": 1472, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 368, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.9072999168294257e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }